diff --git a/_clang-format b/_clang-format index 37a50f367..aef7d8e0f 100644 --- a/_clang-format +++ b/_clang-format @@ -21,31 +21,31 @@ Language: Cpp AccessModifierOffset: -4 AlignAfterOpenBracket: Align -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignConsecutiveMacros: true +AlignConsecutiveAssignments: + Enabled: false +AlignConsecutiveDeclarations: + Enabled: false +AlignConsecutiveMacros: + Enabled: true AlignEscapedNewlines: Left -AlignOperands: true -AlignTrailingComments: false +AlignOperands: Align +AlignTrailingComments: + Kind: Never AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: false AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: false +AllowShortBlocksOnASingleLine: Never AllowShortCaseLabelsOnASingleLine: true AllowShortFunctionsOnASingleLine: Empty AllowShortIfStatementsOnASingleLine: Never AllowShortLambdasOnASingleLine: Inline AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: Yes BinPackArguments: true BinPackParameters: true BraceWrapping: AfterCaseLabel: false AfterClass: false - AfterControlStatement: false + AfterControlStatement: Never AfterEnum: false AfterFunction: false AfterNamespace: false @@ -59,46 +59,33 @@ BraceWrapping: SplitEmptyFunction: false SplitEmptyRecord: false SplitEmptyNamespace: false +BreakAfterReturnType: Automatic BreakBeforeBinaryOperators: None BreakBeforeBraces: Custom BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeColon BreakInheritanceList: BeforeColon BreakStringLiterals: false +BreakTemplateDeclarations: Yes ColumnLimit: 100 -CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: true ConstructorInitializerIndentWidth: 8 ContinuationIndentWidth: 4 Cpp11BracedListStyle: false -DerivePointerAlignment: true +DerivePointerAlignment: false DisableFormat: false FixNamespaceComments: true -ForEachMacros: - - foreach - - Q_FOREACH - - BOOST_FOREACH -IncludeBlocks: Preserve -IncludeCategories: - - Regex: '^' - Priority: 2 - - Regex: '^<.*\.h>' - Priority: 1 - - Regex: '^<.*' - Priority: 2 - - Regex: '.*' - Priority: 3 -IncludeIsMainRegex: '([-_](test|unittest))?$' IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 4 IndentWrappedFunctionNames: false -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' +KeepEmptyLines: + AtEndOfFile: false + AtStartOfBlock: false + AtStartOfFile: false MaxEmptyLinesToKeep: 1 NamespaceIndentation: None +PackConstructorInitializers: CurrentLine PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 @@ -108,57 +95,24 @@ PenaltyBreakTemplateDeclaration: 10 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Left -RawStringFormats: - - Language: Cpp - Delimiters: - - cc - - CC - - cpp - - Cpp - - CPP - - 'c++' - - 'C++' - CanonicalDelimiter: '' - BasedOnStyle: google - - Language: TextProto - Delimiters: - - pb - - PB - - proto - - PROTO - EnclosingFunctions: - - EqualsProto - - EquivToProto - - PARSE_PARTIAL_TEXT_PROTO - - PARSE_TEST_PROTO - - PARSE_TEXT_PROTO - - ParseTextOrDie - - ParseTextProtoOrDie - CanonicalDelimiter: '' - BasedOnStyle: google ReflowComments: false -SortIncludes: false -SortUsingDeclarations: false +SortIncludes: Never +SortUsingDeclarations: Never SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 -SpacesInAngles: false +SpacesInAngles: Never SpacesInContainerLiterals: false -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false +SpacesInParens: Never SpacesInSquareBrackets: false -Standard: Cpp11 -StatementMacros: - - Q_UNUSED - - QT_REQUIRE_VERSION -TabWidth: 1 +Standard: c++17 UseTab: Never ... diff --git a/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp b/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp index 358c0b768..c6beb44da 100644 --- a/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp +++ b/examples/blas/compile_time_dispatching/level3/gemm_usm_mklcpu_cublas.cpp @@ -61,7 +61,7 @@ // // is performed and finally the results are post processed. // -void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) { +void run_gemm_example(const sycl::device& cpu_dev, const sycl::device& gpu_dev) { // // Initialize data for Gemm // @@ -89,11 +89,11 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) // Catch asynchronous exceptions for CPU and GPU auto cpu_exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught asynchronous SYCL exception on CPU device during GEMM:" << std::endl; std::cerr << "\t" << e.what() << std::endl; @@ -102,11 +102,11 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) std::exit(2); }; auto gpu_exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught asynchronous SYCL exception on GPU device during GEMM:" << std::endl; std::cerr << "\t" << e.what() << std::endl; @@ -141,9 +141,9 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) sycl::context cpu_cxt = cpu_queue.get_context(); // allocate on CPU device and copy data from host to SYCL CPU device - float *cpu_A = sycl::malloc_device(sizea * sizeof(float), cpu_queue); - float *cpu_B = sycl::malloc_device(sizeb * sizeof(float), cpu_queue); - float *cpu_C = sycl::malloc_device(sizec * sizeof(float), cpu_queue); + float* cpu_A = sycl::malloc_device(sizea * sizeof(float), cpu_queue); + float* cpu_B = sycl::malloc_device(sizeb * sizeof(float), cpu_queue); + float* cpu_C = sycl::malloc_device(sizec * sizeof(float), cpu_queue); if (!cpu_A || !cpu_B || !cpu_C) { throw std::runtime_error("Failed to allocate USM memory."); } @@ -159,9 +159,9 @@ void run_gemm_example(const sycl::device &cpu_dev, const sycl::device &gpu_dev) sycl::context gpu_cxt = gpu_queue.get_context(); // allocate on GPU device and copy data from host to SYCL GPU device - float *gpu_A = sycl::malloc_device(sizea * sizeof(float), gpu_queue); - float *gpu_B = sycl::malloc_device(sizeb * sizeof(float), gpu_queue); - float *gpu_C = sycl::malloc_device(sizec * sizeof(float), gpu_queue); + float* gpu_A = sycl::malloc_device(sizea * sizeof(float), gpu_queue); + float* gpu_B = sycl::malloc_device(sizeb * sizeof(float), gpu_queue); + float* gpu_C = sycl::malloc_device(sizec * sizeof(float), gpu_queue); if (!gpu_A || !gpu_B || !gpu_C) { throw std::runtime_error("Failed to allocate USM memory."); } @@ -260,7 +260,7 @@ void print_example_banner() { // // Main entry point for example. // -int main(int argc, char **argv) { +int main(int argc, char** argv) { print_example_banner(); try { @@ -279,13 +279,13 @@ int main(int argc, char **argv) { run_gemm_example(cpu_dev, gpu_dev); std::cout << "BLAS GEMM USM example ran OK on MKLCPU and CUBLAS" << std::endl; } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught synchronous SYCL exception during GEMM:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } - catch (std::exception const &e) { + catch (std::exception const& e) { std::cerr << "Caught std::exception during GEMM:"; std::cerr << "\t" << e.what() << std::endl; return 1; diff --git a/examples/include/example_helper.hpp b/examples/include/example_helper.hpp index 4a89e6fae..c5da54acf 100644 --- a/examples/include/example_helper.hpp +++ b/examples/include/example_helper.hpp @@ -88,7 +88,7 @@ fp rand_scalar() { } template -void rand_matrix(vec &M, oneapi::mkl::transpose trans, int m, int n, int ld) { +void rand_matrix(vec& M, oneapi::mkl::transpose trans, int m, int n, int ld) { using fp = typename vec::value_type; if (trans == oneapi::mkl::transpose::nontrans) { @@ -104,7 +104,7 @@ void rand_matrix(vec &M, oneapi::mkl::transpose trans, int m, int n, int ld) { } template -intType generate_sparse_matrix(const intType nx, intType *ia, intType *ja, fp *a, +intType generate_sparse_matrix(const intType nx, intType* ia, intType* ja, fp* a, const intType index = 0) { intType nz = nx, ny = nx; intType nnz = 0; @@ -172,7 +172,7 @@ bool check_result(fp res, fp ref, intType nFlops, intType index) { } template -void free_vec(std::vector &ptr_vec, sycl::queue queue) { +void free_vec(std::vector& ptr_vec, sycl::queue queue) { for (auto ptr : ptr_vec) { sycl::free(ptr, queue); } diff --git a/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp index 4ab078601..964afb49b 100644 --- a/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp +++ b/examples/sparse_blas/compile_time_dispatching/sparse_blas_spmv_usm_mklcpu.cpp @@ -60,7 +60,7 @@ // is performed and finally the results are post processed. // template -int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { +int run_sparse_matrix_vector_multiply_example(const sycl::device& cpu_dev) { // Matrix data size intType size = 4; intType nrows = size * size * size; @@ -71,11 +71,11 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // Catch asynchronous exceptions auto exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cout << "Caught asynchronous SYCL " "exception during sparse::spmv:\n" << e.what() << std::endl; @@ -94,12 +94,12 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { std::size_t sizeia = static_cast(nrows + 1); std::size_t sizevec = static_cast(nrows); - ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), cpu_queue); - ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), cpu_queue); - a = (fp *)sycl::malloc_shared(sizea * sizeof(fp), cpu_queue); - x = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); - y = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); - z = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + ia = (intType*)sycl::malloc_shared(sizeia * sizeof(intType), cpu_queue); + ja = (intType*)sycl::malloc_shared(sizeja * sizeof(intType), cpu_queue); + a = (fp*)sycl::malloc_shared(sizea * sizeof(fp), cpu_queue); + x = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + y = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); + z = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), cpu_queue); if (!ia || !ja || !a || !x || !y || !z) { throw std::runtime_error("Failed to allocate USM memory"); @@ -114,10 +114,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { z[i] = set_fp_value(fp(0.0)); } - std::vector int_ptr_vec; + std::vector int_ptr_vec; int_ptr_vec.push_back(ia); int_ptr_vec.push_back(ja); - std::vector fp_ptr_vec; + std::vector fp_ptr_vec; fp_ptr_vec.push_back(a); fp_ptr_vec.push_back(x); fp_ptr_vec.push_back(y); @@ -159,7 +159,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { std::size_t workspace_size = 0; oneapi::mkl::sparse::spmv_buffer_size(cpu_selector, transA, &alpha, A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_size); - void *workspace = sycl::malloc_device(workspace_size, cpu_queue); + void* workspace = sycl::malloc_device(workspace_size, cpu_queue); // Optimize spmv auto ev_opt = @@ -188,7 +188,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &cpu_dev) { // Post Processing // - fp *res = y; + fp* res = y; const bool isConj = (transA == oneapi::mkl::transpose::conjtrans); for (intType row = 0; row < nrows; row++) { z[row] *= beta; @@ -254,7 +254,7 @@ void print_example_banner() { // // Main entry point for example // -int main(int /*argc*/, char ** /*argv*/) { +int main(int /*argc*/, char** /*argv*/) { print_example_banner(); try { @@ -269,13 +269,13 @@ int main(int /*argc*/, char ** /*argv*/) { run_sparse_matrix_vector_multiply_example(cpu_dev); std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl; } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } - catch (std::exception const &e) { + catch (std::exception const& e) { std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; return 1; diff --git a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp index d87297600..69be82745 100644 --- a/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp +++ b/examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp @@ -61,7 +61,7 @@ // is performed and finally the results are post processed. // template -int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { +int run_sparse_matrix_vector_multiply_example(const sycl::device& dev) { // Matrix data size intType size = 4; intType nrows = size * size * size; @@ -72,11 +72,11 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Catch asynchronous exceptions auto exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cout << "Caught asynchronous SYCL " "exception during sparse::spmv:\n" << e.what() << std::endl; @@ -95,12 +95,12 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { std::size_t sizevec = static_cast(nrows); auto sizevec_i64 = static_cast(sizevec); - ia = (intType *)sycl::malloc_shared(sizeia * sizeof(intType), main_queue); - ja = (intType *)sycl::malloc_shared(sizeja * sizeof(intType), main_queue); - a = (fp *)sycl::malloc_shared(sizea * sizeof(fp), main_queue); - x = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); - y = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); - z = (fp *)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + ia = (intType*)sycl::malloc_shared(sizeia * sizeof(intType), main_queue); + ja = (intType*)sycl::malloc_shared(sizeja * sizeof(intType), main_queue); + a = (fp*)sycl::malloc_shared(sizea * sizeof(fp), main_queue); + x = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + y = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); + z = (fp*)sycl::malloc_shared(sizevec * sizeof(fp), main_queue); if (!ia || !ja || !a || !x || !y || !z) { throw std::runtime_error("Failed to allocate USM memory"); @@ -115,10 +115,10 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { z[i] = set_fp_value(fp(0.0)); } - std::vector int_ptr_vec; + std::vector int_ptr_vec; int_ptr_vec.push_back(ia); int_ptr_vec.push_back(ja); - std::vector fp_ptr_vec; + std::vector fp_ptr_vec; fp_ptr_vec.push_back(a); fp_ptr_vec.push_back(x); fp_ptr_vec.push_back(y); @@ -160,7 +160,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { std::size_t workspace_size = 0; oneapi::mkl::sparse::spmv_buffer_size(main_queue, transA, &alpha, A_view, A_handle, x_handle, &beta, y_handle, alg, descr, workspace_size); - void *workspace = sycl::malloc_device(workspace_size, main_queue); + void* workspace = sycl::malloc_device(workspace_size, main_queue); // Optimize spmv auto ev_opt = @@ -189,7 +189,7 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device &dev) { // Post Processing // - fp *res = y; + fp* res = y; const bool isConj = (transA == oneapi::mkl::transpose::conjtrans); for (intType row = 0; row < nrows; row++) { z[row] *= beta; @@ -258,7 +258,7 @@ void print_example_banner() { // // Main entry point for example // -int main(int /*argc*/, char ** /*argv*/) { +int main(int /*argc*/, char** /*argv*/) { print_example_banner(); try { @@ -279,13 +279,13 @@ int main(int /*argc*/, char ** /*argv*/) { run_sparse_matrix_vector_multiply_example(dev); std::cout << "Sparse BLAS SPMV USM example ran OK." << std::endl; } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cerr << "Caught synchronous SYCL exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; std::cerr << "\tSYCL error code: " << e.code().value() << std::endl; return 1; } - catch (std::exception const &e) { + catch (std::exception const& e) { std::cerr << "Caught std::exception during Sparse SPMV:" << std::endl; std::cerr << "\t" << e.what() << std::endl; return 1; diff --git a/include/oneapi/mkl/bfloat16.hpp b/include/oneapi/mkl/bfloat16.hpp index afa155b1a..127d5ced4 100644 --- a/include/oneapi/mkl/bfloat16.hpp +++ b/include/oneapi/mkl/bfloat16.hpp @@ -70,7 +70,7 @@ struct bfloat16 { inline bfloat16(float f); bfloat16(double d) : bfloat16(float(d)) {} template - bfloat16(T i, typename std::enable_if::value>::type *_ = nullptr) + bfloat16(T i, typename std::enable_if::value>::type* _ = nullptr) : bfloat16(float(i)) {} inline operator float() const; @@ -101,115 +101,115 @@ struct bfloat16 { return h; } - friend float operator+(const bfloat16 &h1, const bfloat16 &h2) { + friend float operator+(const bfloat16& h1, const bfloat16& h2) { return float(h1) + float(h2); } - friend float operator-(const bfloat16 &h1, const bfloat16 &h2) { + friend float operator-(const bfloat16& h1, const bfloat16& h2) { return float(h1) - float(h2); } - friend float operator*(const bfloat16 &h1, const bfloat16 &h2) { + friend float operator*(const bfloat16& h1, const bfloat16& h2) { return float(h1) * float(h2); } - friend float operator/(const bfloat16 &h1, const bfloat16 &h2) { + friend float operator/(const bfloat16& h1, const bfloat16& h2) { return float(h1) / float(h2); } template friend typename std::enable_if::value, float>::type operator+( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) + float(o); } template friend typename std::enable_if::value, float>::type operator-( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) - float(o); } template friend typename std::enable_if::value, float>::type operator*( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) * float(o); } template friend typename std::enable_if::value, float>::type operator/( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) / float(o); } template friend typename std::enable_if::value, float>::type operator+( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return float(o) + float(h); } template friend typename std::enable_if::value, float>::type operator-( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return float(o) - float(h); } template friend typename std::enable_if::value, float>::type operator*( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return float(o) * float(h); } template friend typename std::enable_if::value, float>::type operator/( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return float(o) / float(h); } template friend typename std::enable_if::value, T>::type operator+( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) + o; } template friend typename std::enable_if::value, T>::type operator-( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) - o; } template friend typename std::enable_if::value, T>::type operator*( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) * o; } template friend typename std::enable_if::value, T>::type operator/( - const bfloat16 &h, const T &o) { + const bfloat16& h, const T& o) { return float(h) / o; } template friend typename std::enable_if::value, T>::type operator+( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return o + float(h); } template friend typename std::enable_if::value, T>::type operator-( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return o - float(h); } template friend typename std::enable_if::value, T>::type operator*( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return o * float(h); } template friend typename std::enable_if::value, T>::type operator/( - const T &o, const bfloat16 &h) { + const T& o, const bfloat16& h) { return o / float(h); } template - bfloat16 operator+=(const T &o) { + bfloat16 operator+=(const T& o) { return *this = bfloat16(*this + o); } template - bfloat16 operator-=(const T &o) { + bfloat16 operator-=(const T& o) { return *this = bfloat16(*this - o); } template - bfloat16 operator*=(const T &o) { + bfloat16 operator*=(const T& o) { return *this = bfloat16(*this * o); } template - bfloat16 operator/=(const T &o) { + bfloat16 operator/=(const T& o) { return *this = bfloat16(*this / o); } }; diff --git a/include/oneapi/mkl/blas.hxx b/include/oneapi/mkl/blas.hxx index 374585912..cb89703fc 100644 --- a/include/oneapi/mkl/blas.hxx +++ b/include/oneapi/mkl/blas.hxx @@ -19,1723 +19,1679 @@ // Buffer APIs -static inline void asum(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { detail::asum(get_device_id(queue), queue, n, x, incx, result); } -static inline void asum(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void asum(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::asum(get_device_id(queue), queue, n, x, incx, result); } -static inline void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::asum(get_device_id(queue), queue, n, x, incx, result); } -static inline void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::asum(get_device_id(queue), queue, n, x, incx, result); } -static inline void axpy(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +static inline void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy); } -static inline void axpy(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +static inline void axpy(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy) { detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy); } -static inline void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy); } -static inline void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy); } -static inline void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { +static inline void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { +static inline void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +static inline void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +static inline void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void axpby(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy); } -static inline void axpby(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void axpby(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy) { detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy); } -static inline void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +static inline void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy); } -static inline void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +static inline void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy); } -static inline void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +static inline void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::copy(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +static inline void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::copy(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void copy(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy) { detail::copy(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void copy(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void copy(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { detail::copy(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { +static inline void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, +static inline void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +static inline void copy_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +static inline void copy_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -static inline void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +static inline void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +static inline void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +static inline void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dotc(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +static inline void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& result) { detail::dotc(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dotc(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +static inline void dotc(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { detail::dotc(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dotu(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +static inline void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& result) { detail::dotu(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void dotu(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +static inline void dotu(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { detail::dotu(get_device_id(queue), queue, n, x, incx, y, incy, result); } -static inline void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +static inline void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +static inline void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc) { +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc) { detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -static inline void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - int8_t ao, sycl::buffer &b, std::int64_t ldb, - uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { +static inline void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co) { detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -static inline void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - int8_t ao, sycl::buffer &b, std::int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { +static inline void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co) { detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -static inline void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - uint8_t ao, sycl::buffer &b, std::int64_t ldb, - int8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { +static inline void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co) { detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -static inline void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - uint8_t ao, sycl::buffer &b, std::int64_t ldb, - uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { +static inline void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co) { detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -static inline void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +static inline void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + double beta, sycl::buffer& c, std::int64_t ldc) { detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +static inline void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc) { detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +static inline void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc) { detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy) { detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { +static inline void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -static inline void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, +static inline void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -static inline void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, +static inline void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -static inline void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, +static inline void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -static inline void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +static inline void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -static inline void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { +static inline void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -static inline void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { +static inline void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -static inline void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { +static inline void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -static inline void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +static inline void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { detail::ger(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +static inline void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { detail::ger(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::gerc(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::gerc(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::geru(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::geru(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda); } -static inline void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::hbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -static inline void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::hbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -static inline void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +static inline void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::hemm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +static inline void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::hemm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::hemv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& y, std::int64_t incy) { detail::hemv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +static inline void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { detail::her(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda); } -static inline void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +static inline void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { detail::her(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda); } -static inline void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::her2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -static inline void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +static inline void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda) { detail::her2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -static inline void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { detail::her2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - double beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { detail::her2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, +static inline void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::herk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer, 1> &a, - std::int64_t lda, double beta, sycl::buffer, 1> &c, +static inline void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::herk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +static inline void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::hpmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -static inline void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +static inline void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { detail::hpmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -static inline void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +static inline void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { detail::hpr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a); } -static inline void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +static inline void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { detail::hpr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a); } -static inline void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a) { +static inline void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a) { detail::hpr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a); } -static inline void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a) { +static inline void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a) { detail::hpr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a); } -static inline void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::iamax(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::iamax(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamax(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void iamax(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::iamax(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamax(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void iamax(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::iamax(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::iamin(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::iamin(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamin(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void iamin(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::iamin(get_device_id(queue), queue, n, x, incx, result); } -static inline void iamin(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void iamin(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::iamin(get_device_id(queue), queue, n, x, incx, result); } -static inline void nrm2(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { detail::nrm2(get_device_id(queue), queue, n, x, incx, result); } -static inline void nrm2(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +static inline void nrm2(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { detail::nrm2(get_device_id(queue), queue, n, x, incx, result); } -static inline void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::nrm2(get_device_id(queue), queue, n, x, incx, result); } -static inline void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +static inline void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { detail::nrm2(get_device_id(queue), queue, n, x, incx, result); } -static inline void rot(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, - float s) { +static inline void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, float c, float s) { detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s); } -static inline void rot(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, - double s) { +static inline void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, double c, double s) { detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s); } -static inline void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, +static inline void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s); } -static inline void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - double c, double s) { +static inline void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, + double s) { detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s); } -static inline void rotg(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +static inline void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { detail::rotg(get_device_id(queue), queue, a, b, c, s); } -static inline void rotg(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +static inline void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { detail::rotg(get_device_id(queue), queue, a, b, c, s); } -static inline void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +static inline void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { detail::rotg(get_device_id(queue), queue, a, b, c, s); } -static inline void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s) { +static inline void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { detail::rotg(get_device_id(queue), queue, a, b, c, s); } -static inline void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +static inline void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { detail::rotm(get_device_id(queue), queue, n, x, incx, y, incy, param); } -static inline void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +static inline void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { detail::rotm(get_device_id(queue), queue, n, x, incx, y, incy, param); } -static inline void rotmg(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +static inline void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param) { detail::rotmg(get_device_id(queue), queue, d1, d2, x1, y1, param); } -static inline void rotmg(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - double y1, sycl::buffer ¶m) { +static inline void rotmg(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { detail::rotmg(get_device_id(queue), queue, d1, d2, x1, y1, param); } -static inline void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { detail::sbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -static inline void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy) { detail::sbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -static inline void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx) { detail::scal(get_device_id(queue), queue, n, alpha, x, incx); } -static inline void sdsdot(sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +static inline void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { detail::sdsdot(get_device_id(queue), queue, n, sb, x, incx, y, incy, result); } -static inline void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, - std::int64_t incy) { +static inline void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { detail::spmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -static inline void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, - std::int64_t incy) { +static inline void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { detail::spmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -static inline void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &a) { +static inline void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { detail::spr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a); } -static inline void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &a) { +static inline void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { detail::spr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a); } -static inline void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a) { +static inline void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { detail::spr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a); } -static inline void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a) { +static inline void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { detail::spr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a); } -static inline void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +static inline void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::swap(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +static inline void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { detail::swap(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void swap(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy) { detail::swap(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void swap(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +static inline void swap(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { detail::swap(get_device_id(queue), queue, n, x, incx, y, incy); } -static inline void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc) { +static inline void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc) { +static inline void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +static inline void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +static inline void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +static inline void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { detail::symv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +static inline void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { detail::symv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -static inline void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda) { +static inline void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, + std::int64_t lda) { detail::syr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda); } -static inline void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda) { +static inline void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, + std::int64_t lda) { detail::syr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda); } -static inline void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +static inline void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { detail::syr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -static inline void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +static inline void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { detail::syr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -static inline void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc) { +static inline void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc) { +static inline void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -static inline void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, - std::int64_t ldc) { +static inline void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, - std::int64_t ldc) { +static inline void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + double beta, sycl::buffer& c, std::int64_t ldc) { detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +static inline void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -static inline void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { +static inline void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -static inline void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { +static inline void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -static inline void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, +static inline void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -static inline void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, +static inline void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -static inline void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +static inline void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +static inline void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx) { detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +static inline void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +static inline void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +static inline void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, + std::int64_t incx) { detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -static inline void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +static inline void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +static inline void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +static inline void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +static inline void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +static inline void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx); } -static inline void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +static inline void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +static inline void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx) { +static inline void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx) { +static inline void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +static inline void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -static inline void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, +static inline void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, +static inline void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, +static inline void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, +static inline void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +static inline void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +static inline void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx) { +static inline void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx) { +static inline void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -static inline void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, +static inline void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, +static inline void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -static inline void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, +static inline void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -static inline void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, +static inline void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -static inline void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -static inline void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -static inline void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -static inline void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -static inline void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -static inline void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +static inline void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb); } -static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +static inline void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb); } -static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, +static inline void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb); } -static inline void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, +static inline void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb); } -static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +static inline void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +static inline void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +static inline void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -static inline void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +static inline void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &ab, std::int64_t lda, +static inline void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb); } -static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &ab, std::int64_t lda, +static inline void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb); } -static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +static inline void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb); } -static inline void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +static inline void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb); } -static inline void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +static inline void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -static inline void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &c, std::int64_t ldc) { +static inline void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& c, std::int64_t ldc) { detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -static inline void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, sycl::buffer, 1>& c, std::int64_t ldc) { detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -static inline void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +static inline void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, sycl::buffer, 1>& c, std::int64_t ldc) { detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); @@ -1743,603 +1699,590 @@ static inline void omatadd(sycl::queue &queue, transpose transa, transpose trans // USM APIs -static inline sycl::event asum(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}) { auto done = detail::asum(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event asum(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}) { +static inline sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}) { auto done = detail::asum(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}) { auto done = detail::asum(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}) { +static inline sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}) { auto done = detail::asum(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event axpy(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy, dependencies); return done; } -static inline sycl::event axpy(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy, dependencies); return done; } -static inline sycl::event axpy(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy, dependencies); return done; } -static inline sycl::event axpy(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpy(get_device_id(queue), queue, n, alpha, x, incx, y, incy, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, double *alpha, - const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, float *alpha, - const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, - std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, - float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, - double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::axpy_batch(get_device_id(queue), queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event axpby(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event axpby(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, - double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event axpby(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event axpby(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::axpby(get_device_id(queue), queue, n, alpha, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::copy(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::copy(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event copy(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::copy(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event copy(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::copy(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event copy_batch(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event copy_batch(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::copy_batch(get_device_id(queue), queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *result, - const std::vector &dependencies = {}) { +static inline sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}) { auto done = detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *result, - const std::vector &dependencies = {}) { +static inline sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}) { auto done = detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - double *result, - const std::vector &dependencies = {}) { +static inline sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}) { auto done = detail::dot(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dotc(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}) { +static inline sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}) { auto done = detail::dotc(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dotc(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}) { +static inline sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}) { auto done = detail::dotc(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dotu(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}) { +static inline sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}) { auto done = detail::dotu(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event dotu(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}) { +static inline sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}) { auto done = detail::dotu(get_device_id(queue), queue, n, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gbmv(get_device_id(queue), queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, - std::int64_t ldb, sycl::half beta, sycl::half *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, + std::int64_t ldb, sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const bfloat16 *a, std::int64_t lda, const bfloat16 *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const bfloat16* a, std::int64_t lda, const bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemm(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, const float **b, std::int64_t *ldb, - float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, + const float** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, const double **b, std::int64_t *ldb, - double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, + const double** b, std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch( - sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch( - sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, sycl::half *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, - sycl::half *beta, sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, + sycl::half** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, - std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, double beta, double *c, - std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, + double beta, double* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2347,11 +2290,11 @@ static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, trans } static inline sycl::event gemm_batch( - sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}) { + sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2359,226 +2302,219 @@ static inline sycl::event gemm_batch( } static inline sycl::event gemm_batch( - sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}) { + sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, - std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, std::int64_t stride_c, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, std::int64_t stride_c, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - float beta, std::int32_t *c, std::int64_t ldc, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::gemm_batch(get_device_id(queue), queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::gemmt(get_device_id(queue), queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, + offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}) { auto done = detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -static inline sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, + offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}) { auto done = detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -static inline sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, - std::int64_t lda, std::uint8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, + offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}) { auto done = detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -static inline sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, - std::int64_t lda, std::uint8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}) { +static inline sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, + offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}) { auto done = detail::gemm_bias(get_device_id(queue), queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); return done; } -static inline sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::gemv(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, - float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, - double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2586,11 +2522,11 @@ static inline sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::i } static inline sycl::event gemv_batch( - sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}) { + sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2598,1538 +2534,1508 @@ static inline sycl::event gemv_batch( } static inline sycl::event gemv_batch( - sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}) { + sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, - float *beta, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, + float* beta, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, - double *beta, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, + double* beta, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::gemv_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, + std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, + std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, + std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, + std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::dgmm_batch(get_device_id(queue), queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::ger(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::ger(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::gerc(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::gerc(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::geru(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::geru(get_device_id(queue), queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::hemm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::hemm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hemv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hemv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::her(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } -static inline sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::her(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } -static inline sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::her2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::her2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::her2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::her2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, - const std::complex *a, std::int64_t lda, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, + const std::complex* a, std::int64_t lda, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::herk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - const std::complex *a, std::int64_t lda, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, + const std::complex* a, std::int64_t lda, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::herk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hpmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::hpmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies = {}) { +static inline sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies = {}) { auto done = detail::hpr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, dependencies); return done; } -static inline sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies = {}) { +static inline sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies = {}) { auto done = detail::hpr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, dependencies); return done; } -static inline sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, - const std::vector &dependencies = {}) { +static inline sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}) { auto done = detail::hpr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -static inline sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, - const std::vector &dependencies = {}) { +static inline sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}) { auto done = detail::hpr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -static inline sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamax(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamax(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamax(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamax(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamax(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamax(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamin(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamin(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamin(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamin(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event iamin(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}) { +static inline sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}) { auto done = detail::iamin(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event nrm2(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}) { auto done = detail::nrm2(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event nrm2(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}) { +static inline sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}) { auto done = detail::nrm2(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}) { auto done = detail::nrm2(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}) { +static inline sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}) { auto done = detail::nrm2(get_device_id(queue), queue, n, x, incx, result, dependencies); return done; } -static inline sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - float c, float s, - const std::vector &dependencies = {}) { +static inline sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, + float s, const std::vector& dependencies = {}) { auto done = detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s, dependencies); return done; } -static inline sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - double c, double s, - const std::vector &dependencies = {}) { +static inline sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + double c, double s, + const std::vector& dependencies = {}) { auto done = detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s, dependencies); return done; } -static inline sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies = {}) { +static inline sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies = {}) { auto done = detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s, dependencies); return done; } -static inline sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, - double s, const std::vector &dependencies = {}) { +static inline sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies = {}) { auto done = detail::rot(get_device_id(queue), queue, n, x, incx, y, incy, c, s, dependencies); return done; } -static inline sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}) { +static inline sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}) { auto done = detail::rotg(get_device_id(queue), queue, a, b, c, s, dependencies); return done; } -static inline sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, - double *s, - const std::vector &dependencies = {}) { +static inline sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}) { auto done = detail::rotg(get_device_id(queue), queue, a, b, c, s, dependencies); return done; } -static inline sycl::event rotg(sycl::queue &queue, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies = {}) { +static inline sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, + float* c, std::complex* s, + const std::vector& dependencies = {}) { auto done = detail::rotg(get_device_id(queue), queue, a, b, c, s, dependencies); return done; } -static inline sycl::event rotg(sycl::queue &queue, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies = {}) { +static inline sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, + double* c, std::complex* s, + const std::vector& dependencies = {}) { auto done = detail::rotg(get_device_id(queue), queue, a, b, c, s, dependencies); return done; } -static inline sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies = {}) { +static inline sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, float* param, + const std::vector& dependencies = {}) { auto done = detail::rotm(get_device_id(queue), queue, n, x, incx, y, incy, param, dependencies); return done; } -static inline sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies = {}) { +static inline sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, double* param, + const std::vector& dependencies = {}) { auto done = detail::rotm(get_device_id(queue), queue, n, x, incx, y, incy, param, dependencies); return done; } -static inline sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, - const std::vector &dependencies = {}) { +static inline sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, + float* param, const std::vector& dependencies = {}) { auto done = detail::rotmg(get_device_id(queue), queue, d1, d2, x1, y1, param, dependencies); return done; } -static inline sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, - double y1, double *param, - const std::vector &dependencies = {}) { +static inline sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, + double* param, const std::vector& dependencies = {}) { auto done = detail::rotmg(get_device_id(queue), queue, d1, d2, x1, y1, param, dependencies); return done; } -static inline sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::sbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::sbmv(get_device_id(queue), queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, float *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, double *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::scal(get_device_id(queue), queue, n, alpha, x, incx, dependencies); return done; } -static inline sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}) { +static inline sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies = {}) { auto done = detail::sdsdot(get_device_id(queue), queue, n, sb, x, incx, y, incy, result, dependencies); return done; } -static inline sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::spmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::spmv(get_device_id(queue), queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies = {}) { +static inline sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* a, + const std::vector& dependencies = {}) { auto done = detail::spr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, dependencies); return done; } -static inline sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies = {}) { +static inline sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* a, + const std::vector& dependencies = {}) { auto done = detail::spr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, dependencies); return done; } -static inline sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies = {}) { +static inline sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies = {}) { auto done = detail::spr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -static inline sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}) { +static inline sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, + const std::vector& dependencies = {}) { auto done = detail::spr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -static inline sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::swap(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::swap(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::swap(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::swap(get_device_id(queue), queue, n, x, incx, y, incy, dependencies); return done; } -static inline sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::symm(get_device_id(queue), queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::symv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}) { +static inline sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}) { auto done = detail::symv(get_device_id(queue), queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -static inline sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::syr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } -static inline sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::syr(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } -static inline sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::syr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}) { +static inline sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}) { auto done = detail::syr2(get_device_id(queue), queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -static inline sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syr2k(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::syrk(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, - float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, + std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, - double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, + std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - float beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - double beta, double *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::syrk_batch(get_device_id(queue), queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tbsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } -static inline sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, - std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, + std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::tpsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } -static inline sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trmm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trmv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::trsm(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, - std::int64_t *m, std::int64_t *n, float *alpha, - const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -static inline sycl::event trsm_batch(sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, - std::int64_t *m, std::int64_t *n, double *alpha, - const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -static inline sycl::event trsm_batch( - sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, diag *unit_diag, - std::int64_t *m, std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -static inline sycl::event trsm_batch( - sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, diag *unit_diag, - std::int64_t *m, std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}) { +static inline sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}) { auto done = detail::trsm_batch(get_device_id(queue), queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -static inline sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}) { +static inline sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}) { auto done = detail::trsv(get_device_id(queue), queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -static inline sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, +static inline sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, +static inline sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -static inline sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, +static inline sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -static inline sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, +static inline sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -static inline sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -static inline sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy_batch(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } -static inline sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, +static inline sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - double beta, const double *b, std::int64_t ldb, - std::int64_t stride_b, double *c, std::int64_t ldc, + const double* a, std::int64_t lda, std::int64_t stride_a, + double beta, const double* b, std::int64_t ldb, + std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4137,185 +4043,185 @@ static inline sycl::event omatadd_batch(sycl::queue &queue, transpose transa, tr } static inline sycl::event omatadd_batch( - sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}) { + sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}) { auto done = detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex *c, + const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatadd_batch(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); return done; } -static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - float *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - double *b, std::int64_t ldb, - const std::vector &dependencies = {}) { +static inline sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}) { + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::omatcopy(get_device_id(queue), queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, +static inline sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, +static inline sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -static inline sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::omatcopy2(get_device_id(queue), queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } -static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, +static inline sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } -static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, +static inline sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { auto done = detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } -static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } -static inline sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, +static inline sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}) { auto done = detail::imatcopy(get_device_id(queue), queue, trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } -static inline sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, std::int64_t ldb, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, std::int64_t ldb, + float* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; } -static inline sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, std::int64_t ldb, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}) { +static inline sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, std::int64_t ldb, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; } -static inline sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { + const std::complex* a, std::int64_t lda, + std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; } -static inline sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, +static inline sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}) { + const std::complex* a, std::int64_t lda, + std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}) { auto done = detail::omatadd(get_device_id(queue), queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; diff --git a/include/oneapi/mkl/blas/detail/blas_ct_backends.hxx b/include/oneapi/mkl/blas/detail/blas_ct_backends.hxx index afebb93c3..cd03497d6 100644 --- a/include/oneapi/mkl/blas/detail/blas_ct_backends.hxx +++ b/include/oneapi/mkl/blas/detail/blas_ct_backends.hxx @@ -20,2892 +20,2813 @@ // Buffer APIs static inline void syr2(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); static inline void syr2(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); static inline void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& x, std::int64_t incx); static inline void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& x, std::int64_t incx); static inline void trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); static inline void trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); static inline void trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void trmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); static inline void tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); static inline void tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tpmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); static inline void spr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); static inline void spr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); static inline void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); static inline void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); static inline void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + double beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); static inline void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void her2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); static inline void her2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); static inline void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s); static inline void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s); static inline void rot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, float c, float s); static inline void rot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, double c, double s); static inline void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void axpy(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); static inline void axpy(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); static inline void axpy_batch(backend_selector selector, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + float alpha, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void axpy_batch(backend_selector selector, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + double alpha, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); static inline void axpby(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); static inline void axpby(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); static inline void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); static inline void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); static inline void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); static inline void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); static inline void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); static inline void gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void gemv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + float beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); static inline void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size); + std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); static inline void dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, + std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); static inline void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); static inline void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); static inline void her(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, + std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); static inline void her(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, + std::int64_t n, double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); static inline void hpr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); + std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); static inline void hpr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); + std::int64_t n, double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); static inline void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::half alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void spmv(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); + std::int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void spmv(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + std::int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); static inline void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); static inline void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); static inline void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); static inline void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); static inline void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); static inline void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); static inline void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); static inline void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); static inline void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc); + sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void herk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); static inline void herk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); static inline void ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); static inline void ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); static inline void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); static inline void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); static inline void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); static inline void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); static inline void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); static inline void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); static inline void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); static inline void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); static inline void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); static inline void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tbmv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); static inline void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); static inline void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); static inline void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); static inline void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); static inline void syr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); static inline void syr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); static inline void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -static inline void rotmg(backend_selector selector, - sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, - sycl::buffer ¶m); +static inline void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param); -static inline void rotmg(backend_selector selector, - sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, - sycl::buffer ¶m); +static inline void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param); static inline void tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); static inline void tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); static inline void tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tpsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); static inline void trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); static inline void trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); static inline void trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void trsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); static inline void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); static inline void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); static inline void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); static inline void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); static inline void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); static inline void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); static inline void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); static inline void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); static inline void sbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void sbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); static inline void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); static inline void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); static inline void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void tbsv(backend_selector selector, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); static inline void spr2(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); static inline void spr2(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); static inline void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); static inline void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); static inline void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); static inline void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); -static inline void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); +static inline void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); -static inline void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); +static inline void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); static inline void rotg(backend_selector selector, - sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); + sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); static inline void rotg(backend_selector selector, - sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); + sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); static inline void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); static inline void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); static inline void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, double beta, sycl::buffer, 1>& c, + std::int64_t ldc); static inline void dot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); static inline void dot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); static inline void dot(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); static inline void symv(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); static inline void symv(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); static inline void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); static inline void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); static inline void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); static inline void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); static inline void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); static inline void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); static inline void omatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); static inline void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); static inline void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); static inline void omatcopy2(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &b, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); static inline void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb); + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); static inline void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); static inline void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); static inline void imatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); static inline void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb); + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); static inline void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); static inline void imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); static inline void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); static inline void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); static inline void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); static inline void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs static inline sycl::event syr2(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event syr2(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - float alpha, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + float alpha, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - double alpha, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + double alpha, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + std::complex alpha, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - float alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + float alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event scal(backend_selector selector, std::int64_t n, - double alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + double alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const float *a, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const float* a, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const double *a, - double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const double* a, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event spr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *a, const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* a, const std::vector& dependencies = {}); static inline sycl::event spr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *a, const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + double* a, const std::vector& dependencies = {}); static inline sycl::event hpmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event hpmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, + std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event syrk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); static inline sycl::event syrk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies = {}); static inline sycl::event syrk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); static inline sycl::event syrk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, + double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, std::int64_t stride_a, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, + double beta, double* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event syrk_batch(backend_selector selector, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); -static inline sycl::event syrk_batch(backend_selector selector, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event syrk_batch(backend_selector selector, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); +static inline sycl::event syrk_batch(backend_selector selector, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event her2(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event her2(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event hbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event hbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); static inline sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, double s, - const std::vector &dependencies = {}); + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); -static inline sycl::event rot(backend_selector selector, std::int64_t n, - float *x, std::int64_t incx, float *y, std::int64_t incy, float c, - float s, const std::vector &dependencies = {}); +static inline sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); static inline sycl::event rot(backend_selector selector, std::int64_t n, - double *x, std::int64_t incx, double *y, std::int64_t incy, - double c, double s, - const std::vector &dependencies = {}); + double* x, std::int64_t incx, double* y, std::int64_t incy, double c, + double s, const std::vector& dependencies = {}); static inline sycl::event axpy(backend_selector selector, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); + float alpha, const float* x, std::int64_t incx, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpy(backend_selector selector, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); + double alpha, const double* x, std::int64_t incx, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t *n, float *alpha, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t *n, double *alpha, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event axpy_batch(backend_selector selector, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + float* alpha, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + double* alpha, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t n, + float alpha, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t n, + double alpha, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event axpy_batch(backend_selector selector, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event axpby(backend_selector selector, std::int64_t n, - float alpha, const float *x, std::int64_t incx, - const float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + float alpha, const float* x, std::int64_t incx, const float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpby(backend_selector selector, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + double alpha, const double* x, std::int64_t incx, const double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gerc(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event gerc(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event syr2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event syr2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event syr2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event syr2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gemv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch(backend_selector selector, - transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float beta, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -static inline sycl::event gemv_batch(backend_selector selector, - transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +static inline sycl::event gemv_batch(backend_selector selector, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event gemv_batch( backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event gemv_batch( backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch(backend_selector selector, - transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch(backend_selector selector, - transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch(backend_selector selector, - transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemv_batch( - backend_selector selector, transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, - float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, + const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float* beta, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, + const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double* beta, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side left_right, + std::int64_t m, std::int64_t n, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); -static inline sycl::event dgmm_batch(backend_selector selector, - side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, - double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side *left_right, std::int64_t *m, std::int64_t *n, - const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side *left_right, std::int64_t *m, std::int64_t *n, - const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event dgmm_batch(backend_selector selector, - side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); +static inline sycl::event dgmm_batch(backend_selector selector, side left_right, + std::int64_t m, std::int64_t n, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, + double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); static inline sycl::event her(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const std::complex *x, - std::int64_t incx, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const std::complex* x, + std::int64_t incx, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event her(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const std::complex *x, - std::int64_t incx, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const std::complex* x, + std::int64_t incx, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event hpr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const std::complex *x, - std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const std::complex* x, + std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); static inline sycl::event hpr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const std::complex *x, - std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const std::complex* x, + std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); static inline sycl::event iamin(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamin(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, const float** b, std::int64_t* ldb, + float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, const double** b, std::int64_t* ldb, + double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, sycl::half* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, + sycl::half* beta, sycl::half** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, + float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, + float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, + float* beta, std::int32_t** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); -static inline sycl::event gemm_batch( - backend_selector selector, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, sycl::half *alpha, - const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, - float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, - float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, - float *beta, std::int32_t **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_batch(backend_selector selector, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double beta, double *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); +static inline sycl::event gemm_batch(backend_selector selector, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_batch(backend_selector selector, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, + std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event gemm_batch( backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event gemm_batch( backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, std::int64_t ldc, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -static inline sycl::event gemm_batch( - backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); +static inline sycl::event gemm_batch(backend_selector selector, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::half* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, + std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, + std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, + std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event spmv(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *a, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* a, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event spmv(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *a, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* a, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event swap(backend_selector selector, std::int64_t n, - float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event swap(backend_selector selector, std::int64_t n, - double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event geru(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event geru(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); static inline sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); static inline sycl::event nrm2(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); static inline sycl::event nrm2(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, sycl::half beta, - sycl::half *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); static inline sycl::event gemm(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const bfloat16 *a, std::int64_t lda, - const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const bfloat16* a, std::int64_t lda, const bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event herk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const std::complex *a, std::int64_t lda, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const std::complex* a, std::int64_t lda, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event herk(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const std::complex *a, std::int64_t lda, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const std::complex* a, std::int64_t lda, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event ger(backend_selector selector, std::int64_t m, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event ger(backend_selector selector, std::int64_t m, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event trsm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trsm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trsm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trsm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event trsm_batch(backend_selector selector, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, - const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, + int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, + int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, int64_t m, + int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, int64_t m, + int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, + int64_t* m, int64_t* n, float* alpha, const float** a, + int64_t* lda, float** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, + int64_t* m, int64_t* n, double* alpha, const double** a, + int64_t* lda, double** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, + int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, + int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); static inline sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); static inline sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); static inline sycl::event hemm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event hemm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event hpr2(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); static inline sycl::event hpr2(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); static inline sycl::event gbmv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gbmv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gbmv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gbmv(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event tbmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbmv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event symm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event symm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event symm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event symm(backend_selector selector, side left_right, - uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); static inline sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); static inline sycl::event syr(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event syr(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *a, std::int64_t lda, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + double* a, std::int64_t lda, + const std::vector& dependencies = {}); static inline sycl::event trmm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trmm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trmm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event trmm(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -static inline sycl::event rotmg(backend_selector selector, float *d1, - float *d2, float *x1, float y1, float *param, - const std::vector &dependencies = {}); +static inline sycl::event rotmg(backend_selector selector, float* d1, float* d2, + float* x1, float y1, float* param, + const std::vector& dependencies = {}); -static inline sycl::event rotmg(backend_selector selector, double *d1, - double *d2, double *x1, double y1, double *param, - const std::vector &dependencies = {}); +static inline sycl::event rotmg(backend_selector selector, double* d1, double* d2, + double* x1, double y1, double* param, + const std::vector& dependencies = {}); static inline sycl::event tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const float *a, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const float* a, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const double *a, - double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const double* a, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tpsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event trsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event copy(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event copy(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t *n, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t *n, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t n, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -static inline sycl::event copy_batch(backend_selector selector, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +static inline sycl::event copy_batch(backend_selector selector, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); static inline sycl::event hemv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event hemv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -static inline sycl::event gemm_bias( - backend_selector selector, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -static inline sycl::event gemm_bias( - backend_selector selector, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -static inline sycl::event gemm_bias( - backend_selector selector, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t *a, int64_t lda, - std::uint8_t ao, const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, - int64_t ldc, const std::int32_t *co, const std::vector &dependencies = {}); - -static inline sycl::event gemm_bias(backend_selector selector, - transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, - const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_bias(backend_selector selector, transpose transa, + transpose transb, offset offsetc, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, + std::int8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_bias(backend_selector selector, transpose transa, + transpose transb, offset offsetc, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, + std::int8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_bias(backend_selector selector, transpose transa, + transpose transb, offset offsetc, int64_t m, int64_t n, + int64_t k, float alpha, const std::uint8_t* a, int64_t lda, + std::uint8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +static inline sycl::event gemm_bias(backend_selector selector, transpose transa, + transpose transb, offset offsetc, int64_t m, int64_t n, + int64_t k, float alpha, const std::uint8_t* a, int64_t lda, + std::uint8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); static inline sycl::event sbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event sbmv(backend_selector selector, uplo upper_lower, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); static inline sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); static inline sycl::event asum(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); static inline sycl::event asum(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); static inline sycl::event tbsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event tbsv(backend_selector selector, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); static inline sycl::event spr2(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, + const std::vector& dependencies = {}); static inline sycl::event spr2(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, + const std::vector& dependencies = {}); static inline sycl::event iamax(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamax(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); static inline sycl::event rotm(backend_selector selector, std::int64_t n, - float *x, std::int64_t incx, float *y, std::int64_t incy, - float *param, - const std::vector &dependencies = {}); + float* x, std::int64_t incx, float* y, std::int64_t incy, + float* param, const std::vector& dependencies = {}); static inline sycl::event rotm(backend_selector selector, std::int64_t n, - double *x, std::int64_t incx, double *y, std::int64_t incy, - double *param, - const std::vector &dependencies = {}); + double* x, std::int64_t incx, double* y, std::int64_t incy, + double* param, const std::vector& dependencies = {}); -static inline sycl::event rotg(backend_selector selector, float *a, float *b, - float *c, float *s, - const std::vector &dependencies = {}); +static inline sycl::event rotg(backend_selector selector, float* a, float* b, + float* c, float* s, + const std::vector& dependencies = {}); -static inline sycl::event rotg(backend_selector selector, double *a, - double *b, double *c, double *s, - const std::vector &dependencies = {}); +static inline sycl::event rotg(backend_selector selector, double* a, double* b, + double* c, double* s, + const std::vector& dependencies = {}); -static inline sycl::event rotg(backend_selector selector, - std::complex *a, std::complex *b, float *c, - std::complex *s, - const std::vector &dependencies = {}); +static inline sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies = {}); -static inline sycl::event rotg(backend_selector selector, - std::complex *a, std::complex *b, double *c, - std::complex *s, - const std::vector &dependencies = {}); +static inline sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies = {}); static inline sycl::event sdsdot(backend_selector selector, std::int64_t n, - float sb, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}); + float sb, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* result, + const std::vector& dependencies = {}); static inline sycl::event her2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event her2k(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event dot(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies = {}); static inline sycl::event dot(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *result, - const std::vector &dependencies = {}); + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* result, + const std::vector& dependencies = {}); static inline sycl::event dot(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, double *result, - const std::vector &dependencies = {}); + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + double* result, const std::vector& dependencies = {}); static inline sycl::event symv(backend_selector selector, uplo upper_lower, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event symv(backend_selector selector, uplo upper_lower, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); + std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, + float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, + double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - float alpha, float *ab, std::int64_t lda, std::int64_t ldb, + float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - double alpha, double *ab, std::int64_t lda, + double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, + std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, + std::int64_t n, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double *c, std::int64_t ldc, std::int64_t stride_c, + const double* b, std::int64_t ldb, std::int64_t stride_b, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatadd_batch( backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); + std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); static inline sycl::event omatadd_batch( backend_selector selector, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, + std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, const std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatcopy2(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, float *b, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy2(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); static inline sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); static inline sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float beta, const float *b, - std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const float* a, std::int64_t lda, float beta, const float* b, + std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double beta, const double *b, - std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const double* a, std::int64_t lda, double beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); static inline sycl::event omatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, diff --git a/include/oneapi/mkl/blas/detail/blas_loader.hxx b/include/oneapi/mkl/blas/detail/blas_loader.hxx index 98d93b2ad..22ef22283 100644 --- a/include/oneapi/mkl/blas/detail/blas_loader.hxx +++ b/include/oneapi/mkl/blas/detail/blas_loader.hxx @@ -19,2632 +19,2489 @@ // Buffer APIs -ONEMKL_EXPORT void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMKL_EXPORT void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMKL_EXPORT void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, +ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer, 1> &x, - std::int64_t incx); -ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx); - -ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMKL_EXPORT void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx); + +ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); -ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMKL_EXPORT void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); -ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMKL_EXPORT void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); -ONEMKL_EXPORT void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); +ONEMKL_EXPORT void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); +ONEMKL_EXPORT void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); -ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); +ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); +ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::half alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + double beta, sycl::buffer& c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size); +ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); -ONEMKL_EXPORT void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); +ONEMKL_EXPORT void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, +ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s); -ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, +ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s); -ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s); -ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s); - -ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, +ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, float c, float s); +ONEMKL_EXPORT void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, double c, double s); + +ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, +ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy); +ONEMKL_EXPORT void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, +ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); -ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, + sycl::buffer, 1>& y, std::int64_t incy); +ONEMKL_EXPORT void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); -ONEMKL_EXPORT void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); +ONEMKL_EXPORT void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); +ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + float beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size); +ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, +ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +ONEMKL_EXPORT void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -ONEMKL_EXPORT void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, +ONEMKL_EXPORT void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, +ONEMKL_EXPORT void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); -ONEMKL_EXPORT void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); +ONEMKL_EXPORT void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); +ONEMKL_EXPORT void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); -ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); -ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); +ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); -ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); +ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); -ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); +ONEMKL_EXPORT void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); - -ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMKL_EXPORT void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); + +ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMKL_EXPORT void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMKL_EXPORT void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, - sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, - sycl::buffer ¶m); -ONEMKL_EXPORT void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, - sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, - sycl::buffer ¶m); - -ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); -ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMKL_EXPORT void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMKL_EXPORT void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param); +ONEMKL_EXPORT void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, + sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param); + +ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); +ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); +ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); +ONEMKL_EXPORT void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMKL_EXPORT void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); -ONEMKL_EXPORT void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); +ONEMKL_EXPORT void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); - -ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); + +ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMKL_EXPORT void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); +ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -ONEMKL_EXPORT void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); -ONEMKL_EXPORT void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); -ONEMKL_EXPORT void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + +ONEMKL_EXPORT void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); +ONEMKL_EXPORT void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); + +ONEMKL_EXPORT void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); +ONEMKL_EXPORT void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); + +ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); -ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); +ONEMKL_EXPORT void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -ONEMKL_EXPORT void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); -ONEMKL_EXPORT void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - -ONEMKL_EXPORT void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +ONEMKL_EXPORT void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); +ONEMKL_EXPORT void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + +ONEMKL_EXPORT void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +ONEMKL_EXPORT void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); -ONEMKL_EXPORT void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); +ONEMKL_EXPORT void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); -ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); +ONEMKL_EXPORT void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMKL_EXPORT void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); +ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); -ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); +ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +ONEMKL_EXPORT void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMKL_EXPORT void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); -ONEMKL_EXPORT void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - -ONEMKL_EXPORT void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); -ONEMKL_EXPORT void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + +ONEMKL_EXPORT void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); +ONEMKL_EXPORT void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + +ONEMKL_EXPORT void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); +ONEMKL_EXPORT void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + +ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); -ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); +ONEMKL_EXPORT void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -ONEMKL_EXPORT void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - -ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +ONEMKL_EXPORT void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); -ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); -ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); +ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMKL_EXPORT void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMKL_EXPORT void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); -ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); +ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); +ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); +ONEMKL_EXPORT void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +ONEMKL_EXPORT void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - -ONEMKL_EXPORT void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); -ONEMKL_EXPORT void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); -ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); - -ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + +ONEMKL_EXPORT void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); +ONEMKL_EXPORT void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + +ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); +ONEMKL_EXPORT void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); + +ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); -ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); +ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); -ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); +ONEMKL_EXPORT void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -ONEMKL_EXPORT void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); -ONEMKL_EXPORT void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - -ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +ONEMKL_EXPORT void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); +ONEMKL_EXPORT void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); + +ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); +ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); +ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +ONEMKL_EXPORT void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); -ONEMKL_EXPORT void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - -ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); -ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); -ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - -ONEMKL_EXPORT void sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - float sb, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - -ONEMKL_EXPORT void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +ONEMKL_EXPORT void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); +ONEMKL_EXPORT void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); + +ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); +ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); +ONEMKL_EXPORT void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); + +ONEMKL_EXPORT void sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result); + +ONEMKL_EXPORT void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMKL_EXPORT void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMKL_EXPORT void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue &queue, - sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); -ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue &queue, - sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); -ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue &queue, - sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); -ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue &queue, - sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - -ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, double beta, sycl::buffer, 1>& c, + std::int64_t ldc); + +ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); +ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); +ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue& queue, + sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); +ONEMKL_EXPORT void rotg(oneapi::mkl::device libkey, sycl::queue& queue, + sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); + +ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); -ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); +ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); -ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); +ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); -ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); +ONEMKL_EXPORT void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &b, +ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); -ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb); -ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); +ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); -ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); -ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &ab, +ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); -ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb); -ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); +ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); -ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +ONEMKL_EXPORT void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); -ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); -ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); +ONEMKL_EXPORT void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs -ONEMKL_EXPORT sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, sycl::half *alpha, - const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double beta, double *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const std::complex* a, std::int64_t lda, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const std::complex* a, std::int64_t lda, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const float* a, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const double* a, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* a, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + double* a, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, sycl::half* alpha, + const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, + std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event gemm_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, + oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event gemm_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, + oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, std::int64_t stride_c, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, std::int64_t stride_c, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - float beta, std::int32_t *c, std::int64_t ldc, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, - uplo *upper_lower, transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, double s, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, float *alpha, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, double *alpha, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemv_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, + uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, + uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, + uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, + uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, + uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, + uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, + uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, + uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float* x, std::int64_t incx, float* y, std::int64_t incy, float c, + float s, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double* x, std::int64_t incx, double* y, std::int64_t incy, double c, + double s, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, float* alpha, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, double* alpha, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, + float beta, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose trans, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, + double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event gemv_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, - transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); + oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event gemv_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, - float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, - double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side *left_right, std::int64_t *m, std::int64_t *n, - const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side *left_right, std::int64_t *m, std::int64_t *n, - const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *a, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *a, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, float *d1, - float *d2, float *x1, float y1, float *param, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, double *d1, - double *d2, double *x1, double y1, double *param, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, - std::int64_t ldb, sycl::half beta, sycl::half *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, - const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_bias( - oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, int64_t lda, - std::int8_t ao, const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, - int64_t ldc, const std::int32_t *co, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, - const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::int8_t *b, int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, - transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, - const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, - std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, - std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex *b, - int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, - const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, - transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, - side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, int64_t group_count, - int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, int64_t group_count, - int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, int64_t group_count, - int64_t *group_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, int64_t group_count, - int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float *param, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, double *param, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float sb, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, - uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, float *a, - float *b, float *c, float *s, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, double *a, - double *b, double *c, double *s, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, - std::complex *a, std::complex *b, float *c, - std::complex *s, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, - std::complex *a, std::complex *b, double *c, - std::complex *s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, + oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float* beta, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double* beta, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, + transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, + float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, + double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side* left_right, std::int64_t* m, std::int64_t* n, + const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side* left_right, std::int64_t* m, std::int64_t* n, + const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const std::complex* x, + std::int64_t incx, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const std::complex* x, + std::int64_t incx, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const std::complex* x, + std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const std::complex* x, + std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, + std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* a, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* a, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, float* d1, + float* d2, float* x1, float y1, float* param, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, double* d1, + double* d2, double* x1, double y1, double* param, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose transa, transpose transb, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const bfloat16* a, std::int64_t lda, const bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, + transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, const std::int8_t* a, + int64_t lda, std::int8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, + transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, const std::int8_t* a, + int64_t lda, std::int8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, + transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, const std::uint8_t* a, + int64_t lda, std::uint8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, + transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, const std::uint8_t* a, + int64_t lda, std::uint8_t ao, const std::uint8_t* b, + int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, + int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, + int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, + int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, std::complex* b, + int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, std::complex* b, + int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, float** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, double** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const float* a, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const double* a, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const float* a, + std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, const double* a, + std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, int64_t group_count, + int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, float* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, std::int64_t n, std::int64_t k, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + std::int64_t n, double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float* x, std::int64_t incx, float* y, std::int64_t incy, + float* param, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double* x, std::int64_t incx, double* y, std::int64_t incy, + double* param, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* result, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + double* result, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float sb, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, float* a, float* b, + float* c, float* s, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, double* a, double* b, + double* c, double* s, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, + std::complex* a, std::complex* b, float* c, + std::complex* s, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, + std::complex* a, std::complex* b, double* c, + std::complex* s, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, + float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, + double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, float *ab, std::int64_t lda, std::int64_t ldb, + float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, double *ab, std::int64_t lda, + double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, + std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, + std::int64_t n, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double *c, std::int64_t ldc, std::int64_t stride_c, + const double* b, std::int64_t ldb, std::int64_t stride_b, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event omatadd_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, + oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event omatadd_batch( - oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, float *b, +ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, +ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float beta, const float *b, - std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + const float* a, std::int64_t lda, float beta, const float* b, + std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double beta, const double *b, - std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + const double* a, std::int64_t lda, double beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, diff --git a/include/oneapi/mkl/blas/detail/cublas/blas_ct.hxx b/include/oneapi/mkl/blas/detail/cublas/blas_ct.hxx index 9483a66c1..d5678917e 100644 --- a/include/oneapi/mkl/blas/detail/cublas/blas_ct.hxx +++ b/include/oneapi/mkl/blas/detail/cublas/blas_ct.hxx @@ -18,122 +18,121 @@ **************************************************************************/ void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, + std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -142,10 +141,10 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - double beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); @@ -153,10 +152,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -165,11 +163,10 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); @@ -177,9 +174,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -188,9 +185,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -199,9 +196,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -210,9 +207,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -220,38 +217,38 @@ void gemm_batch(backend_selector selector, transpose transa, tr } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -259,10 +256,9 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); @@ -270,9 +266,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -281,249 +276,240 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, @@ -531,22 +517,21 @@ void gemv_batch(backend_selector selector, transpose trans, std } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); @@ -554,52 +539,49 @@ void gemv_batch(backend_selector selector, transpose trans, std void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size) { + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -607,277 +589,268 @@ void dgmm_batch(backend_selector selector, side left_right, std } void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::cublas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a, - std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -885,7 +858,7 @@ void trsm(backend_selector selector, side left_right, uplo uppe void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -893,190 +866,187 @@ void trsm(backend_selector selector, side left_right, uplo uppe void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); @@ -1084,7 +1054,7 @@ void syr(backend_selector selector, uplo upper_lower, std::int6 void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1092,7 +1062,7 @@ void trmm(backend_selector selector, side left_right, uplo uppe void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1100,335 +1070,328 @@ void trmm(backend_selector selector, side left_right, uplo uppe void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::mkl::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::mkl::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::cublas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, @@ -1437,8 +1400,8 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, @@ -1447,10 +1410,9 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1458,10 +1420,9 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1469,72 +1430,69 @@ void trsm_batch(backend_selector selector, side left_right, upl void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::cublas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1542,8 +1500,8 @@ void omatcopy_batch(backend_selector selector, transpose trans, void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1551,22 +1509,22 @@ void omatcopy_batch(backend_selector selector, transpose trans, void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1574,7 +1532,7 @@ void imatcopy_batch(backend_selector selector, transpose trans, void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1582,16 +1540,16 @@ void imatcopy_batch(backend_selector selector, transpose trans, void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1599,9 +1557,9 @@ void omatadd_batch(backend_selector selector, transpose transa, } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1610,9 +1568,9 @@ void omatadd_batch(backend_selector selector, transpose transa, void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1621,10 +1579,10 @@ void omatadd_batch(backend_selector selector, transpose transa, void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::cublas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1632,123 +1590,123 @@ void omatadd_batch(backend_selector selector, transpose transa, } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::cublas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } @@ -1756,253 +1714,250 @@ void omatadd(backend_selector selector, transpose transa, trans // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -2010,11 +1965,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2022,11 +1976,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2034,12 +1987,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2047,12 +1999,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2060,28 +2011,28 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -2089,126 +2040,124 @@ sycl::event hbmv(backend_selector selector, uplo upper_lower, s } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const float* x, std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2216,9 +2165,9 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const double* x, std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2226,10 +2175,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2237,10 +2186,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2248,61 +2197,61 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2310,9 +2259,9 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2320,10 +2269,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2331,11 +2280,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2343,231 +2291,222 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); @@ -2575,164 +2514,162 @@ sycl::event dgmm_batch(backend_selector selector, side *left_ri } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2740,12 +2677,11 @@ sycl::event gemm_batch(backend_selector selector, transpose *tr } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2753,12 +2689,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2766,13 +2701,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2780,13 +2714,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2794,12 +2727,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2808,10 +2741,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2820,10 +2753,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2832,10 +2765,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2843,109 +2776,105 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2953,9 +2882,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2963,10 +2892,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2974,10 +2903,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2985,10 +2914,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2996,10 +2925,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3007,9 +2935,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3017,11 +2945,11 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3029,11 +2957,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3041,11 +2969,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3053,11 +2981,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3065,45 +2993,44 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3111,9 +3038,9 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3121,10 +3048,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3132,10 +3059,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3143,11 +3070,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3155,11 +3081,10 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3167,11 +3092,11 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3179,62 +3104,57 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3242,28 +3162,28 @@ sycl::event trsm_batch(backend_selector selector, side *left_ri } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3271,10 +3191,10 @@ sycl::event hemm(backend_selector selector, side left_right, up } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3282,27 +3202,27 @@ sycl::event hemm(backend_selector selector, side left_right, up } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3310,10 +3230,9 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3321,10 +3240,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3332,10 +3251,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3343,45 +3262,43 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3389,9 +3306,9 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3399,10 +3316,10 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3410,10 +3327,10 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3421,43 +3338,43 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3465,9 +3382,9 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3475,10 +3392,10 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3486,267 +3403,257 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3754,9 +3661,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower, } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3764,9 +3671,9 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, s } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3774,182 +3681,176 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, s } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3957,64 +3858,63 @@ sycl::event her2k(backend_selector selector, uplo upper_lower, } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4022,10 +3922,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4034,9 +3934,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4045,9 +3945,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4055,18 +3955,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4074,9 +3974,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4084,9 +3984,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4094,10 +3994,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4106,10 +4006,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4118,11 +4018,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4131,11 +4031,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4143,115 +4043,115 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4259,9 +4159,9 @@ sycl::event omatadd(backend_selector selector, transpose transa } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4270,9 +4170,9 @@ sycl::event omatadd(backend_selector selector, transpose transa sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4281,9 +4181,9 @@ sycl::event omatadd(backend_selector selector, transpose transa sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4316,8 +4216,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4328,8 +4227,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4339,8 +4237,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); @@ -4350,8 +4247,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::cublas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); diff --git a/include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hxx b/include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hxx index 1141eb238..caa75a646 100644 --- a/include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hxx +++ b/include/oneapi/mkl/blas/detail/cublas/onemkl_blas_cublas.hxx @@ -19,2314 +19,2211 @@ // Buffer APIs -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void axpy(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size); - -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -void axpby(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -void axpby(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); + +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, float c, float s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - double c, double s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, double c, + double s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, float c, float s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, double c, double s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param); -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param); -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); - -void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void sdsdot(sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer, 1>& x, + std::int64_t incx); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); - -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); - -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); + +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); + +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); + +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); - -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); - -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size); - -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size); - -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); - -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); - -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy); +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size); -void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size); -void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size); -void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size); -void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); -void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); + +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); -void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); +void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); -void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a); -void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); -void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, - std::int64_t lda); +void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda); -void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); +void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); +void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - float beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - double beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::complex beta, sycl::buffer, 1> &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); - -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc); +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb); - -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb); - -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); + +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size); +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size); +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size); +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc); // USM APIs -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies = {}); - -sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies = {}); - -sycl::event axpy(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpy(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, double *alpha, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event axpby(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpby(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); - -sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); - -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); - -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies = {}); + +sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies = {}); + +sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, double c, - double s, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, - float *c, std::complex *s, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, - double *c, std::complex *s, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float *param, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, - double *y, std::int64_t incy, double *param, - const std::vector &dependencies = {}); - -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, - float *param, const std::vector &dependencies = {}); - -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double y1, - double *param, const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); - -sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, - double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, float beta, - float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, double beta, - double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float *beta, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double *beta, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, - double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *a, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *a, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *a, - const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, - double *a, const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, - double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, sycl::half beta, - sycl::half *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const std::complex *a, std::int64_t lda, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, double beta, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, - std::int64_t *n, std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, - std::int64_t *n, std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, float *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, double *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, const float **b, std::int64_t *ldb, - float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, const double **b, std::int64_t *ldb, - double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, sycl::half *alpha, - const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, sycl::half *beta, sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, - std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, const float *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies = {}); + +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies = {}); + +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); + +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); + +sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); + +sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); + +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}); + +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}); + +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies = {}); + +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies = {}); + +sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, float* param, + const std::vector& dependencies = {}); + +sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, double* param, + const std::vector& dependencies = {}); + +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param, + const std::vector& dependencies = {}); + +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); + +sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float beta, + float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double beta, + double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex beta, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex beta, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float* beta, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double* beta, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, std::int64_t* n, + const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, std::int64_t* n, + const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, + double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* a, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* a, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, + const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda, + const bfloat16* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, + float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, + double* beta, double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, double* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, std::int64_t* m, std::int64_t* n, float* alpha, + const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, std::int64_t* m, std::int64_t* n, double* alpha, + const double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, + double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta, + sycl::half** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, + std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb, + std::int64_t stride_b, double beta, double* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, sycl::half* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, + std::int64_t ldb, std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, + std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, + std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::int8_t* b, std::int64_t ldb, std::int8_t bo, float beta, + std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, float beta, + std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, std::int64_t stridea, float *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, std::int64_t stridea, double *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float beta, const float *b, - int64_t ldb, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double beta, const double *b, - int64_t ldb, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, float* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, std::int64_t stridea, float* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, std::int64_t stridea, double* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, float beta, const float* b, + int64_t ldb, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, double beta, const double* b, + int64_t ldb, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, diff --git a/include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hxx b/include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hxx index 1724bf5c7..38123485e 100644 --- a/include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hxx +++ b/include/oneapi/mkl/blas/detail/mklcpu/blas_ct.hxx @@ -20,122 +20,121 @@ // Buffer APIs void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, + std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -144,10 +143,10 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - double beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); @@ -155,10 +154,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -167,11 +165,10 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); @@ -179,9 +176,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -190,9 +187,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -201,9 +198,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -212,9 +209,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -222,38 +219,38 @@ void gemm_batch(backend_selector selector, transpose transa, tr } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -261,10 +258,9 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); @@ -272,9 +268,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -283,249 +278,240 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, @@ -533,22 +519,21 @@ void gemv_batch(backend_selector selector, transpose trans, std } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); @@ -556,52 +541,49 @@ void gemv_batch(backend_selector selector, transpose trans, std void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size) { + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -609,277 +591,268 @@ void dgmm_batch(backend_selector selector, side left_right, std } void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a, - std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -887,7 +860,7 @@ void trsm(backend_selector selector, side left_right, uplo uppe void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -895,190 +868,187 @@ void trsm(backend_selector selector, side left_right, uplo uppe void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); @@ -1086,7 +1056,7 @@ void syr(backend_selector selector, uplo upper_lower, std::int6 void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1094,7 +1064,7 @@ void trmm(backend_selector selector, side left_right, uplo uppe void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1102,335 +1072,328 @@ void trmm(backend_selector selector, side left_right, uplo uppe void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::mkl::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::mkl::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklcpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, @@ -1439,8 +1402,8 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, @@ -1449,10 +1412,9 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1460,10 +1422,9 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1471,72 +1432,69 @@ void trsm_batch(backend_selector selector, side left_right, upl void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklcpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1544,8 +1502,8 @@ void omatcopy_batch(backend_selector selector, transpose trans, void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1553,22 +1511,22 @@ void omatcopy_batch(backend_selector selector, transpose trans, void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1576,7 +1534,7 @@ void imatcopy_batch(backend_selector selector, transpose trans, void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1584,16 +1542,16 @@ void imatcopy_batch(backend_selector selector, transpose trans, void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1601,9 +1559,9 @@ void omatadd_batch(backend_selector selector, transpose transa, } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1612,9 +1570,9 @@ void omatadd_batch(backend_selector selector, transpose transa, void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1623,10 +1581,10 @@ void omatadd_batch(backend_selector selector, transpose transa, void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1634,123 +1592,123 @@ void omatadd_batch(backend_selector selector, transpose transa, } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } @@ -1758,253 +1716,250 @@ void omatadd(backend_selector selector, transpose transa, trans // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -2012,11 +1967,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2024,11 +1978,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2036,12 +1989,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2049,12 +2001,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2062,28 +2013,28 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -2091,126 +2042,124 @@ sycl::event hbmv(backend_selector selector, uplo upper_lower, s } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const float* x, std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2218,9 +2167,9 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const double* x, std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2228,10 +2177,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2239,10 +2188,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2250,61 +2199,61 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2312,9 +2261,9 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2322,10 +2271,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2333,11 +2282,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2345,231 +2293,222 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); @@ -2577,164 +2516,162 @@ sycl::event dgmm_batch(backend_selector selector, side *left_ri } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2742,12 +2679,11 @@ sycl::event gemm_batch(backend_selector selector, transpose *tr } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2755,12 +2691,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2768,13 +2703,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2782,13 +2716,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2796,12 +2729,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2810,10 +2743,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2822,10 +2755,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2834,10 +2767,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2845,109 +2778,105 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2955,9 +2884,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2965,10 +2894,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2976,10 +2905,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2987,10 +2916,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2998,10 +2927,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3009,9 +2937,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3019,11 +2947,11 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3031,11 +2959,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3043,11 +2971,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3055,11 +2983,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3067,45 +2995,44 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3113,9 +3040,9 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3123,10 +3050,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3134,10 +3061,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3145,11 +3072,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3157,11 +3083,10 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3169,11 +3094,11 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3181,62 +3106,57 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3244,28 +3164,28 @@ sycl::event trsm_batch(backend_selector selector, side *left_ri } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3273,10 +3193,10 @@ sycl::event hemm(backend_selector selector, side left_right, up } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3284,27 +3204,27 @@ sycl::event hemm(backend_selector selector, side left_right, up } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3312,10 +3232,9 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3323,10 +3242,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3334,10 +3253,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3345,45 +3264,43 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3391,9 +3308,9 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3401,10 +3318,10 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3412,10 +3329,10 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3423,43 +3340,43 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3467,9 +3384,9 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3477,10 +3394,10 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3488,267 +3405,257 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3756,9 +3663,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower, } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3766,9 +3673,9 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, s } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3776,182 +3683,176 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, s } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3959,64 +3860,63 @@ sycl::event her2k(backend_selector selector, uplo upper_lower, } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4024,10 +3924,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4036,9 +3936,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4047,9 +3947,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4057,18 +3957,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4076,9 +3976,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4086,9 +3986,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4096,10 +3996,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4108,10 +4008,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4120,11 +4020,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4133,11 +4033,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4145,115 +4045,115 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4261,9 +4161,9 @@ sycl::event omatadd(backend_selector selector, transpose transa } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4272,9 +4172,9 @@ sycl::event omatadd(backend_selector selector, transpose transa sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4283,9 +4183,9 @@ sycl::event omatadd(backend_selector selector, transpose transa sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4318,8 +4218,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4330,8 +4229,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4341,8 +4239,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); @@ -4352,8 +4249,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklcpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); diff --git a/include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hxx b/include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hxx index c69257e9c..bfad24ca2 100644 --- a/include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hxx +++ b/include/oneapi/mkl/blas/detail/mklgpu/blas_ct.hxx @@ -20,122 +20,121 @@ // Buffer APIs void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, + std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -144,10 +143,10 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - double beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); @@ -155,10 +154,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -167,11 +165,10 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); @@ -179,9 +176,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -190,9 +187,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -201,9 +198,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -212,9 +209,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -222,38 +219,38 @@ void gemm_batch(backend_selector selector, transpose transa, tr } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -261,10 +258,9 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); @@ -272,9 +268,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -283,249 +278,240 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, @@ -533,22 +519,21 @@ void gemv_batch(backend_selector selector, transpose trans, std } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); @@ -556,52 +541,49 @@ void gemv_batch(backend_selector selector, transpose trans, std void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size) { + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -609,277 +591,268 @@ void dgmm_batch(backend_selector selector, side left_right, std } void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a, - std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -887,7 +860,7 @@ void trsm(backend_selector selector, side left_right, uplo uppe void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -895,190 +868,187 @@ void trsm(backend_selector selector, side left_right, uplo uppe void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); @@ -1086,7 +1056,7 @@ void syr(backend_selector selector, uplo upper_lower, std::int6 void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1094,7 +1064,7 @@ void trmm(backend_selector selector, side left_right, uplo uppe void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1102,335 +1072,328 @@ void trmm(backend_selector selector, side left_right, uplo uppe void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::mkl::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::mkl::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::mklgpu::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, @@ -1439,8 +1402,8 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, @@ -1449,10 +1412,9 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1460,10 +1422,9 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1471,72 +1432,69 @@ void trsm_batch(backend_selector selector, side left_right, upl void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::mklgpu::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1544,8 +1502,8 @@ void omatcopy_batch(backend_selector selector, transpose trans, void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1553,22 +1511,22 @@ void omatcopy_batch(backend_selector selector, transpose trans, void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1576,7 +1534,7 @@ void imatcopy_batch(backend_selector selector, transpose trans, void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1584,16 +1542,16 @@ void imatcopy_batch(backend_selector selector, transpose trans, void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1601,9 +1559,9 @@ void omatadd_batch(backend_selector selector, transpose transa, } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1612,9 +1570,9 @@ void omatadd_batch(backend_selector selector, transpose transa, void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1623,10 +1581,10 @@ void omatadd_batch(backend_selector selector, transpose transa, void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1634,123 +1592,123 @@ void omatadd_batch(backend_selector selector, transpose transa, } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } @@ -1758,253 +1716,250 @@ void omatadd(backend_selector selector, transpose transa, trans // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -2012,11 +1967,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2024,11 +1978,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2036,12 +1989,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2049,12 +2001,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2062,28 +2013,28 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -2091,126 +2042,124 @@ sycl::event hbmv(backend_selector selector, uplo upper_lower, s } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const float* x, std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2218,9 +2167,9 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const double* x, std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2228,10 +2177,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2239,10 +2188,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2250,61 +2199,61 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2312,9 +2261,9 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2322,10 +2271,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2333,11 +2282,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2345,231 +2293,222 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); @@ -2577,164 +2516,162 @@ sycl::event dgmm_batch(backend_selector selector, side *left_ri } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2742,12 +2679,12 @@ sycl::event gemm_batch(backend_selector selector, transpose *tr } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2756,10 +2693,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2768,10 +2705,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2780,10 +2717,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2791,12 +2728,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2804,12 +2740,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2817,13 +2752,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2831,13 +2765,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2845,109 +2778,105 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2955,9 +2884,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2965,10 +2894,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2976,10 +2905,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2987,10 +2916,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2998,10 +2927,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3009,9 +2937,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3019,11 +2947,11 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3031,11 +2959,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3043,11 +2971,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3055,11 +2983,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3067,45 +2995,44 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3113,9 +3040,9 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3123,10 +3050,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3134,10 +3061,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3145,11 +3072,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3157,11 +3083,10 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3169,11 +3094,11 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3181,62 +3106,57 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3244,28 +3164,28 @@ sycl::event trsm_batch(backend_selector selector, side *left_ri } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3273,10 +3193,10 @@ sycl::event hemm(backend_selector selector, side left_right, up } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3284,27 +3204,27 @@ sycl::event hemm(backend_selector selector, side left_right, up } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3312,10 +3232,9 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3323,10 +3242,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3334,10 +3253,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3345,45 +3264,43 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3391,9 +3308,9 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3401,10 +3318,10 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3412,10 +3329,10 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3423,43 +3340,43 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3467,9 +3384,9 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3477,10 +3394,10 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3488,267 +3405,257 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3756,9 +3663,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower, } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3766,9 +3673,9 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, s } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3776,182 +3683,176 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, s } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3959,64 +3860,63 @@ sycl::event her2k(backend_selector selector, uplo upper_lower, } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4024,10 +3924,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4036,9 +3936,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4047,9 +3947,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4057,18 +3957,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4076,9 +3976,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4086,9 +3986,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4096,10 +3996,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4108,10 +4008,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4120,11 +4020,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4133,11 +4033,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4145,115 +4045,115 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4261,9 +4161,9 @@ sycl::event omatadd(backend_selector selector, transpose transa } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4272,9 +4172,9 @@ sycl::event omatadd(backend_selector selector, transpose transa sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4283,9 +4183,9 @@ sycl::event omatadd(backend_selector selector, transpose transa sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4318,8 +4218,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4330,8 +4229,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4341,8 +4239,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); @@ -4352,8 +4249,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::mklgpu::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); diff --git a/include/oneapi/mkl/blas/detail/netlib/blas_ct.hxx b/include/oneapi/mkl/blas/detail/netlib/blas_ct.hxx index 404d79ae0..4c94213fb 100644 --- a/include/oneapi/mkl/blas/detail/netlib/blas_ct.hxx +++ b/include/oneapi/mkl/blas/detail/netlib/blas_ct.hxx @@ -20,122 +20,121 @@ // Buffer APIs void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, + std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -144,10 +143,10 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - double beta, sycl::buffer &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); @@ -155,10 +154,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -167,11 +165,10 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); @@ -179,9 +176,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -190,9 +187,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -201,9 +198,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -212,9 +209,9 @@ void gemm_batch(backend_selector selector, transpose transa, tr void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, @@ -222,38 +219,38 @@ void gemm_batch(backend_selector selector, transpose transa, tr } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -261,10 +258,9 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); @@ -272,9 +268,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -283,249 +278,240 @@ void syrk_batch(backend_selector selector, uplo upper_lower, tr void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, @@ -533,22 +519,21 @@ void gemv_batch(backend_selector selector, transpose trans, std } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); @@ -556,52 +541,49 @@ void gemv_batch(backend_selector selector, transpose trans, std void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size) { + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -609,277 +591,268 @@ void dgmm_batch(backend_selector selector, side left_right, std } void her(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::netlib::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a, - std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -887,7 +860,7 @@ void trsm(backend_selector selector, side left_right, uplo uppe void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -895,190 +868,187 @@ void trsm(backend_selector selector, side left_right, uplo uppe void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); @@ -1086,7 +1056,7 @@ void syr(backend_selector selector, uplo upper_lower, std::int6 void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1094,7 +1064,7 @@ void trmm(backend_selector selector, side left_right, uplo uppe void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1102,335 +1072,328 @@ void trmm(backend_selector selector, side left_right, uplo uppe void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::mkl::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::mkl::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, - std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, - std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::netlib::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, @@ -1439,8 +1402,8 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, @@ -1449,10 +1412,9 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1460,10 +1422,9 @@ void trsm_batch(backend_selector selector, side left_right, upl void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size) { + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1471,72 +1432,69 @@ void trsm_batch(backend_selector selector, side left_right, upl void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::netlib::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1544,8 +1502,8 @@ void omatcopy_batch(backend_selector selector, transpose trans, void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1553,22 +1511,22 @@ void omatcopy_batch(backend_selector selector, transpose trans, void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1576,7 +1534,7 @@ void imatcopy_batch(backend_selector selector, transpose trans, void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1584,16 +1542,16 @@ void imatcopy_batch(backend_selector selector, transpose trans, void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1601,9 +1559,9 @@ void omatadd_batch(backend_selector selector, transpose transa, } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1612,9 +1570,9 @@ void omatadd_batch(backend_selector selector, transpose transa, void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1623,10 +1581,10 @@ void omatadd_batch(backend_selector selector, transpose transa, void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::netlib::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, @@ -1634,123 +1592,123 @@ void omatadd_batch(backend_selector selector, transpose transa, } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::netlib::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } @@ -1758,253 +1716,250 @@ void omatadd(backend_selector selector, transpose transa, trans // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(backend_selector selector, std::int64_t n, float alpha, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -2012,11 +1967,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2024,11 +1978,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2036,12 +1989,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2049,12 +2001,11 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2062,28 +2013,28 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_lo } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -2091,126 +2042,124 @@ sycl::event hbmv(backend_selector selector, uplo upper_lower, s } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const float* x, std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2218,9 +2167,9 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const double* x, std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2228,10 +2177,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2239,10 +2188,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2250,61 +2199,61 @@ sycl::event axpy_batch(backend_selector selector, std::int64_t } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2312,9 +2261,9 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2322,10 +2271,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2333,11 +2282,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2345,231 +2293,222 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, - std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); @@ -2577,164 +2516,162 @@ sycl::event dgmm_batch(backend_selector selector, side *left_ri } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2742,12 +2679,11 @@ sycl::event gemm_batch(backend_selector selector, transpose *tr } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2755,12 +2691,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2768,13 +2703,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2782,13 +2716,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2796,12 +2729,12 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event gemm_batch(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2810,10 +2743,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2822,10 +2755,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2834,10 +2767,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tra sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2845,109 +2778,105 @@ sycl::event gemm_batch(backend_selector selector, transpose tra } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2955,9 +2884,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2965,10 +2894,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2976,10 +2905,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2987,10 +2916,10 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2998,10 +2927,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3009,9 +2937,9 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3019,11 +2947,11 @@ sycl::event gemm(backend_selector selector, transpose transa, t } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3031,11 +2959,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3043,11 +2971,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3055,11 +2983,11 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event gemm_bias(backend_selector selector, transpose transa, - transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + transpose transb, offset offsetc, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3067,45 +2995,44 @@ sycl::event gemm_bias(backend_selector selector, transpose tran } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3113,9 +3040,9 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3123,10 +3050,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3134,10 +3061,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3145,11 +3072,10 @@ sycl::event trsm(backend_selector selector, side left_right, up } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3157,11 +3083,10 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3169,11 +3094,11 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3181,62 +3106,57 @@ sycl::event trsm_batch(backend_selector selector, side left_rig } sycl::event trsm_batch(backend_selector selector, side left_right, - uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3244,28 +3164,28 @@ sycl::event trsm_batch(backend_selector selector, side *left_ri } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3273,10 +3193,10 @@ sycl::event hemm(backend_selector selector, side left_right, up } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3284,27 +3204,27 @@ sycl::event hemm(backend_selector selector, side left_right, up } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3312,10 +3232,9 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3323,10 +3242,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3334,10 +3253,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3345,45 +3264,43 @@ sycl::event gbmv(backend_selector selector, transpose trans, st } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3391,9 +3308,9 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3401,10 +3318,10 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3412,10 +3329,10 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3423,43 +3340,43 @@ sycl::event symm(backend_selector selector, side left_right, up } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3467,9 +3384,9 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3477,10 +3394,10 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3488,267 +3405,257 @@ sycl::event trmm(backend_selector selector, side left_right, up } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event gemmt(backend_selector selector, uplo upper_lower, - transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3756,9 +3663,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower, } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3766,9 +3673,9 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, s } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3776,182 +3683,176 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, s } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, - std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3959,64 +3860,63 @@ sycl::event her2k(backend_selector selector, uplo upper_lower, } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4024,10 +3924,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4036,10 +3936,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4048,10 +3947,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -4059,19 +3957,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpose } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, - const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4079,9 +3976,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4089,9 +3986,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4099,10 +3996,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4111,10 +4008,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4123,12 +4020,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4137,12 +4033,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4150,115 +4045,115 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4266,9 +4161,9 @@ sycl::event omatadd(backend_selector selector, transpose transa } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4277,9 +4172,9 @@ sycl::event omatadd(backend_selector selector, transpose transa sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4288,9 +4183,9 @@ sycl::event omatadd(backend_selector selector, transpose transa sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4323,8 +4218,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4335,8 +4229,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4346,8 +4239,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); @@ -4357,8 +4249,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpose sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::netlib::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); diff --git a/include/oneapi/mkl/blas/detail/onemkl_blas_backends.hxx b/include/oneapi/mkl/blas/detail/onemkl_blas_backends.hxx index fbb64a6a0..ef0db5b09 100644 --- a/include/oneapi/mkl/blas/detail/onemkl_blas_backends.hxx +++ b/include/oneapi/mkl/blas/detail/onemkl_blas_backends.hxx @@ -19,2879 +19,2786 @@ // Buffer APIs -ONEMKL_EXPORT void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc); + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::half beta, sycl::buffer &c, std::int64_t ldc); + std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::half beta, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void symm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void symm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); -ONEMKL_EXPORT void symm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); -ONEMKL_EXPORT void symm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void hemm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void hemm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, + std::int64_t ldc); -ONEMKL_EXPORT void hemm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void hemm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &c, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &c, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void herk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void herk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, + std::int64_t ldc); -ONEMKL_EXPORT void syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void her2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, float beta, sycl::buffer, 1> &c, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, float beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void her2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, - std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, + std::int64_t ldb, double beta, sycl::buffer, 1>& c, + std::int64_t ldc); -ONEMKL_EXPORT void trmm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb); -ONEMKL_EXPORT void trmm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb); -ONEMKL_EXPORT void trmm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); -ONEMKL_EXPORT void trmm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); -ONEMKL_EXPORT void trsm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb); -ONEMKL_EXPORT void trsm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb); -ONEMKL_EXPORT void trsm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); -ONEMKL_EXPORT void trsm(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); -ONEMKL_EXPORT void gemv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void gemv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void gemv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void gemv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &x, +ONEMKL_EXPORT void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -ONEMKL_EXPORT void gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, double beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -ONEMKL_EXPORT void gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size); +ONEMKL_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size); -ONEMKL_EXPORT void dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, +ONEMKL_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -ONEMKL_EXPORT void dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, +ONEMKL_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -ONEMKL_EXPORT void dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, +ONEMKL_EXPORT void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -ONEMKL_EXPORT void gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy); -ONEMKL_EXPORT void gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy); -ONEMKL_EXPORT void gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -ONEMKL_EXPORT void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + sycl::buffer, 1>& y, std::int64_t incy); + +ONEMKL_EXPORT void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); + +ONEMKL_EXPORT void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); + +ONEMKL_EXPORT void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +ONEMKL_EXPORT void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +ONEMKL_EXPORT void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +ONEMKL_EXPORT void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void hbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, +ONEMKL_EXPORT void hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void hbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, +ONEMKL_EXPORT void hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void hemv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +ONEMKL_EXPORT void hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void hemv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, +ONEMKL_EXPORT void hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); + sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void her(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); +ONEMKL_EXPORT void her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void her(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, - std::int64_t lda); +ONEMKL_EXPORT void her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void her2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +ONEMKL_EXPORT void her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void her2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, +ONEMKL_EXPORT void her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); -ONEMKL_EXPORT void hpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +ONEMKL_EXPORT void hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void hpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +ONEMKL_EXPORT void hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void hpr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); - -ONEMKL_EXPORT void hpr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); - -ONEMKL_EXPORT void hpr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a); - -ONEMKL_EXPORT void hpr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a); - -ONEMKL_EXPORT void sbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - -ONEMKL_EXPORT void sbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - -ONEMKL_EXPORT void symv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - -ONEMKL_EXPORT void symv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - -ONEMKL_EXPORT void syr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void syr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void syr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void syr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - -ONEMKL_EXPORT void spmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, +ONEMKL_EXPORT void hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); + +ONEMKL_EXPORT void hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); + +ONEMKL_EXPORT void hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a); + +ONEMKL_EXPORT void hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a); + +ONEMKL_EXPORT void sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + +ONEMKL_EXPORT void sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMKL_EXPORT void symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + +ONEMKL_EXPORT void symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + +ONEMKL_EXPORT void syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + +ONEMKL_EXPORT void syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + +ONEMKL_EXPORT void syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); + +ONEMKL_EXPORT void syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a, + std::int64_t lda); + +ONEMKL_EXPORT void spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void spmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, + std::int64_t incy); -ONEMKL_EXPORT void spr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a); +ONEMKL_EXPORT void spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); -ONEMKL_EXPORT void spr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a); +ONEMKL_EXPORT void spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); -ONEMKL_EXPORT void spr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); +ONEMKL_EXPORT void spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); -ONEMKL_EXPORT void spr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); +ONEMKL_EXPORT void spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& a); -ONEMKL_EXPORT void tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx); + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx); + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx); + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, - std::int64_t incx); + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, - std::int64_t incx); + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -ONEMKL_EXPORT void trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -ONEMKL_EXPORT void trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -ONEMKL_EXPORT void trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx); -ONEMKL_EXPORT void trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void dotc(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +ONEMKL_EXPORT void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& result); -ONEMKL_EXPORT void dotc(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +ONEMKL_EXPORT void dotc(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -ONEMKL_EXPORT void dotu(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +ONEMKL_EXPORT void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& result); -ONEMKL_EXPORT void dotu(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +ONEMKL_EXPORT void dotu(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -ONEMKL_EXPORT void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMKL_EXPORT void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMKL_EXPORT void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void iamax(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMKL_EXPORT void iamax(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); -ONEMKL_EXPORT void iamax(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMKL_EXPORT void iamax(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); -ONEMKL_EXPORT void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMKL_EXPORT void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMKL_EXPORT void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void iamin(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMKL_EXPORT void iamin(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); -ONEMKL_EXPORT void iamin(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMKL_EXPORT void iamin(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); -ONEMKL_EXPORT void asum(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMKL_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void asum(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMKL_EXPORT void asum(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); -ONEMKL_EXPORT void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMKL_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMKL_EXPORT void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void axpy(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void axpy(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void axpy(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy); -ONEMKL_EXPORT void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +ONEMKL_EXPORT void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +ONEMKL_EXPORT void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -ONEMKL_EXPORT void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, - std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size); -ONEMKL_EXPORT void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +ONEMKL_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +ONEMKL_EXPORT void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void axpby(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, + std::int64_t incy); -ONEMKL_EXPORT void axpby(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void axpby(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +ONEMKL_EXPORT void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, +ONEMKL_EXPORT void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void copy(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +ONEMKL_EXPORT void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy); -ONEMKL_EXPORT void copy(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +ONEMKL_EXPORT void copy(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size); +ONEMKL_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, +ONEMKL_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +ONEMKL_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void copy_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &y, +ONEMKL_EXPORT void copy_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -ONEMKL_EXPORT void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +ONEMKL_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); -ONEMKL_EXPORT void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +ONEMKL_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); -ONEMKL_EXPORT void sdsdot(sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +ONEMKL_EXPORT void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); -ONEMKL_EXPORT void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +ONEMKL_EXPORT void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); -ONEMKL_EXPORT void nrm2(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMKL_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void nrm2(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); +ONEMKL_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); -ONEMKL_EXPORT void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMKL_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); +ONEMKL_EXPORT void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); -ONEMKL_EXPORT void rot(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, - float s); +ONEMKL_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, float c, float s); -ONEMKL_EXPORT void rot(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, - double s); +ONEMKL_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, double c, double s); -ONEMKL_EXPORT void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, +ONEMKL_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s); -ONEMKL_EXPORT void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - double c, double s); +ONEMKL_EXPORT void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, + double s); -ONEMKL_EXPORT void rotg(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); +ONEMKL_EXPORT void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -ONEMKL_EXPORT void rotg(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); +ONEMKL_EXPORT void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -ONEMKL_EXPORT void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +ONEMKL_EXPORT void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -ONEMKL_EXPORT void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); +ONEMKL_EXPORT void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -ONEMKL_EXPORT void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); +ONEMKL_EXPORT void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); -ONEMKL_EXPORT void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); +ONEMKL_EXPORT void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); -ONEMKL_EXPORT void rotmg(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m); +ONEMKL_EXPORT void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param); -ONEMKL_EXPORT void rotmg(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - double y1, sycl::buffer ¶m); +ONEMKL_EXPORT void rotmg(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param); -ONEMKL_EXPORT void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx); +ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx); -ONEMKL_EXPORT void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx); +ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx); -ONEMKL_EXPORT void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); +ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); +ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void scal(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); +ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); +ONEMKL_EXPORT void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); -ONEMKL_EXPORT void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +ONEMKL_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -ONEMKL_EXPORT void swap(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +ONEMKL_EXPORT void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy); -ONEMKL_EXPORT void swap(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +ONEMKL_EXPORT void swap(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -ONEMKL_EXPORT void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, double beta, sycl::buffer &c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, + std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, + sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, + sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, + sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, +ONEMKL_EXPORT void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + double beta, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +ONEMKL_EXPORT void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -ONEMKL_EXPORT void gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -ONEMKL_EXPORT void gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -ONEMKL_EXPORT void gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, - float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -ONEMKL_EXPORT void omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, +ONEMKL_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, +ONEMKL_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -ONEMKL_EXPORT void imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, +ONEMKL_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMKL_EXPORT void imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, +ONEMKL_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMKL_EXPORT void imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMKL_EXPORT void imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -ONEMKL_EXPORT void omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -ONEMKL_EXPORT void omatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); +ONEMKL_EXPORT void omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); -ONEMKL_EXPORT void omatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); +ONEMKL_EXPORT void omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); -ONEMKL_EXPORT void omatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -ONEMKL_EXPORT void omatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -ONEMKL_EXPORT void omatcopy2(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &b, +ONEMKL_EXPORT void omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); -ONEMKL_EXPORT void omatcopy2(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer &b, +ONEMKL_EXPORT void omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); -ONEMKL_EXPORT void omatcopy2(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); -ONEMKL_EXPORT void omatcopy2(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); -ONEMKL_EXPORT void imatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, +ONEMKL_EXPORT void imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); -ONEMKL_EXPORT void imatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, +ONEMKL_EXPORT void imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); -ONEMKL_EXPORT void imatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); -ONEMKL_EXPORT void imatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT void imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); -ONEMKL_EXPORT void omatadd(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void omatadd(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void omatadd(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void omatadd(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); -ONEMKL_EXPORT void omatadd(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void omatadd(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); -ONEMKL_EXPORT void omatadd(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT void omatadd(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs -ONEMKL_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const bfloat16 *a, std::int64_t lda, - const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event symm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event symm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event symm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event symm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hemm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hemm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, double beta, - double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, - std::complex *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event herk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event herk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, const std::complex *a, std::int64_t lda, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event her2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event her2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda, + const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int8_t ao, + const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::int8_t* a, std::int64_t lda, std::int8_t ao, + const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hemm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hemm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, double beta, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, double* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trsm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, - oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm_batch( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm_batch( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, - oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, - oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm_batch( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsm_batch( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, - float beta, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, + std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + float alpha, const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + double alpha, const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, + oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float beta, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, - double beta, double *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event gemv_batch( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event gemv_batch( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, - const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float *beta, float **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, - const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double *beta, double **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, + const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float* beta, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, + const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double* beta, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, + std::int64_t m, std::int64_t n, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gemv_batch( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, - std::int64_t m, std::int64_t n, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float *c, - std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, + std::int64_t m, std::int64_t n, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, + std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, + std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, + double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, + std::int64_t lda, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, + std::int64_t lda, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, const float* a, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, const double* a, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, - std::int64_t m, std::int64_t n, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double *c, - std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, - std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, - std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, - std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, - double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hemv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, std::int64_t stridex, + double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hemv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event her(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const std::complex *x, - std::int64_t incx, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event her(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const std::complex *x, - std::int64_t incx, std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event her2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event her2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, + std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, - std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hpr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const std::complex *x, - std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hpr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const std::complex *x, - std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hpr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hpr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event sbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event sbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event symv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event symv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *a, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *a, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *a, const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - double *a, const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event spr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, - std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dotc(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dotc(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dotu(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dotu(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamax(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event iamin(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, float *alpha, - const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, double *alpha, - const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, - std::complex *alpha, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, - float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event copy_batch(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, - double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, - std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, - double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event axpby(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event copy_batch(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, - std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - float c, float s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - double c, double s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, - double s, const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, - double *s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotg(sycl::queue &queue, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotg(sycl::queue &queue, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, - double y1, double *param, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, float *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, double *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, - std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, sycl::half *alpha, - const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float *alpha, - const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, - std::int32_t **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double beta, double *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, float c, + float s, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + double c, double s, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, + float* c, std::complex* s, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, + double* c, std::complex* s, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, float* param, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, double* param, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, + float* param, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, + double* param, const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, + std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, + float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, + double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, sycl::half* alpha, + const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float* alpha, + const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, + std::int32_t** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, + std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event gemm_batch( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event gemm_batch( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, + oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, sycl::half beta, + sycl::half* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, + std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, - std::int64_t ldb, std::int64_t stride_b, float beta, float *c, + std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, + std::int64_t ldb, std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, + std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, - std::int64_t ldc, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t n, std::int64_t k, std::complex alpha, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, + std::int64_t ldc, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, + const float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, + const double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, +ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, +ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, + std::int64_t n, float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, + const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, + std::int64_t n, double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, - double *c, std::int64_t ldc, std::int64_t stride_c, + const double* b, std::int64_t ldb, std::int64_t stride_b, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event omatadd_batch( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, std::int64_t ldc, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, const std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event omatadd_batch( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, std::complex *c, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - float *b, std::int64_t ldb, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - double *b, std::int64_t ldb, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + double* b, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT sycl::event omatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, float *b, +ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, +ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT sycl::event omatcopy2(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, +ONEMKL_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, +ONEMKL_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event imatcopy(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +ONEMKL_EXPORT sycl::event imatcopy(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd(sycl::queue &queue, oneapi::mkl::transpose transa, + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, float beta, - const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + float alpha, const float* a, std::int64_t lda, float beta, + const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, double beta, - const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); + double alpha, const double* a, std::int64_t lda, double beta, + const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event omatadd(sycl::queue &queue, oneapi::mkl::transpose transa, +ONEMKL_EXPORT sycl::event omatadd(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, const float** a, diff --git a/include/oneapi/mkl/blas/detail/portblas/blas_ct.hxx b/include/oneapi/mkl/blas/detail/portblas/blas_ct.hxx index 8a66ed707..6f56157ba 100644 --- a/include/oneapi/mkl/blas/detail/portblas/blas_ct.hxx +++ b/include/oneapi/mkl/blas/detail/portblas/blas_ct.hxx @@ -20,123 +20,123 @@ // Buffer APIs void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, - std::int64_t lda, double beta, sycl::buffer, 1> &c, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -145,9 +145,9 @@ void gemm_batch(backend_selector selector, transpose transa, void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -156,9 +156,9 @@ void gemm_batch(backend_selector selector, transpose transa, void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -167,9 +167,9 @@ void gemm_batch(backend_selector selector, transpose transa, void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -178,9 +178,9 @@ void gemm_batch(backend_selector selector, transpose transa, void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -189,9 +189,9 @@ void gemm_batch(backend_selector selector, transpose transa, void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -200,9 +200,9 @@ void gemm_batch(backend_selector selector, transpose transa, void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -211,9 +211,9 @@ void gemm_batch(backend_selector selector, transpose transa, void gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -221,38 +221,38 @@ void gemm_batch(backend_selector selector, transpose transa, } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -260,8 +260,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -270,8 +270,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -280,8 +280,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -289,236 +289,236 @@ void syrk_batch(backend_selector selector, uplo upper_lower, } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(backend_selector selector, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void sdsdot(backend_selector selector, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, @@ -526,9 +526,9 @@ void gemv_batch(backend_selector selector, transpose trans, s } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, @@ -536,10 +536,10 @@ void gemv_batch(backend_selector selector, transpose trans, s } void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, @@ -548,9 +548,9 @@ void gemv_batch(backend_selector selector, transpose trans, s void gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, @@ -558,9 +558,9 @@ void gemv_batch(backend_selector selector, transpose trans, s } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -568,9 +568,9 @@ void dgmm_batch(backend_selector selector, side left_right, s } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -578,9 +578,9 @@ void dgmm_batch(backend_selector selector, side left_right, s } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -588,9 +588,9 @@ void dgmm_batch(backend_selector selector, side left_right, s } void dgmm_batch(backend_selector selector, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -598,90 +598,90 @@ void dgmm_batch(backend_selector selector, side left_right, s } void her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { + float alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); @@ -689,9 +689,9 @@ void gemm_bias(backend_selector selector, transpose transa, t void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); @@ -699,9 +699,9 @@ void gemm_bias(backend_selector selector, transpose transa, t void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); @@ -709,163 +709,163 @@ void gemm_bias(backend_selector selector, transpose transa, t void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { oneapi::mkl::blas::portblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -873,7 +873,7 @@ void trsm(backend_selector selector, side left_right, uplo up void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -881,187 +881,187 @@ void trsm(backend_selector selector, side left_right, uplo up void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { oneapi::mkl::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); @@ -1069,7 +1069,7 @@ void syr(backend_selector selector, uplo upper_lower, std::in void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1077,7 +1077,7 @@ void trmm(backend_selector selector, side left_right, uplo up void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); @@ -1085,328 +1085,328 @@ void trmm(backend_selector selector, side left_right, uplo up void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::mkl::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::mkl::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { oneapi::mkl::blas::portblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { oneapi::mkl::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } -void iamax(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { oneapi::mkl::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(backend_selector selector, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, @@ -1415,8 +1415,8 @@ void trsm_batch(backend_selector selector, side left_right, u void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, @@ -1425,8 +1425,8 @@ void trsm_batch(backend_selector selector, side left_right, u void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, @@ -1435,8 +1435,8 @@ void trsm_batch(backend_selector selector, side left_right, u void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, @@ -1445,69 +1445,69 @@ void trsm_batch(backend_selector selector, side left_right, u void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { oneapi::mkl::blas::portblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1515,8 +1515,8 @@ void omatcopy_batch(backend_selector selector, transpose tran void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1524,22 +1524,22 @@ void omatcopy_batch(backend_selector selector, transpose tran void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1547,7 +1547,7 @@ void imatcopy_batch(backend_selector selector, transpose tran void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1555,16 +1555,16 @@ void imatcopy_batch(backend_selector selector, transpose tran void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, @@ -1572,9 +1572,9 @@ void omatadd_batch(backend_selector selector, transpose trans } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, @@ -1583,9 +1583,9 @@ void omatadd_batch(backend_selector selector, transpose trans void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, @@ -1594,10 +1594,10 @@ void omatadd_batch(backend_selector selector, transpose trans void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::portblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, @@ -1605,123 +1605,123 @@ void omatadd_batch(backend_selector selector, transpose trans } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::portblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } @@ -1729,185 +1729,185 @@ void omatadd(backend_selector selector, transpose transa, tra // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syr2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syr2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - float *x, std::int64_t incx, const std::vector &dependencies) { + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - double *x, std::int64_t incx, const std::vector &dependencies) { + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hpmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hpmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; @@ -1915,9 +1915,9 @@ sycl::event syrk(backend_selector selector, uplo upper_lower, sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; @@ -1925,54 +1925,54 @@ sycl::event syrk(backend_selector selector, uplo upper_lower, sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -1980,10 +1980,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, float *c, + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1992,9 +1992,9 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_ sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2003,10 +2003,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_ sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2015,10 +2015,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_ sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2026,28 +2026,28 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_ } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::her2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::her2( selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -2055,10 +2055,10 @@ sycl::event hbmv(backend_selector selector, uplo upper_lower, } sycl::event hbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -2066,114 +2066,114 @@ sycl::event hbmv(backend_selector selector, uplo upper_lower, } sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } sycl::event rot(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, + std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, float *alpha, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, float* alpha, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, double *alpha, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, double* alpha, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2181,9 +2181,9 @@ sycl::event axpy_batch(backend_selector selector, std::int64_ } sycl::event axpy_batch(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2191,10 +2191,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_ } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2202,10 +2202,10 @@ sycl::event axpy_batch(backend_selector selector, std::int64_ } sycl::event axpy_batch(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2213,61 +2213,61 @@ sycl::event axpy_batch(backend_selector selector, std::int64_ } sycl::event axpby(backend_selector selector, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2275,9 +2275,9 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2286,9 +2286,9 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2297,9 +2297,9 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2307,49 +2307,49 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, std::int64_t incy, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2357,11 +2357,11 @@ sycl::event gemv_batch(backend_selector selector, transpose t } sycl::event gemv_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, std::int64_t incy, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2370,11 +2370,11 @@ sycl::event gemv_batch(backend_selector selector, transpose t sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2383,58 +2383,58 @@ sycl::event gemv_batch(backend_selector selector, transpose t sycl::event gemv_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); @@ -2442,10 +2442,10 @@ sycl::event gemv_batch(backend_selector selector, transpose * } sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2453,10 +2453,10 @@ sycl::event dgmm_batch(backend_selector selector, side left_r } sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2464,11 +2464,11 @@ sycl::event dgmm_batch(backend_selector selector, side left_r } sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2476,55 +2476,55 @@ sycl::event dgmm_batch(backend_selector selector, side left_r } sycl::event dgmm_batch(backend_selector selector, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); @@ -2532,162 +2532,162 @@ sycl::event dgmm_batch(backend_selector selector, side *left_ } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamin(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2696,11 +2696,11 @@ sycl::event gemm_batch(backend_selector selector, transpose * sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2709,10 +2709,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2721,10 +2721,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2733,10 +2733,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2745,10 +2745,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2757,10 +2757,10 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2769,11 +2769,11 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2782,11 +2782,11 @@ sycl::event gemm_batch(backend_selector selector, transpose t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2794,105 +2794,105 @@ sycl::event gemm_batch(backend_selector selector, transpose t } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::spmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::spmv( selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event swap(backend_selector selector, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event nrm2(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2900,9 +2900,9 @@ sycl::event gemm(backend_selector selector, transpose transa, } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2911,9 +2911,9 @@ sycl::event gemm(backend_selector selector, transpose transa, sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2922,9 +2922,9 @@ sycl::event gemm(backend_selector selector, transpose transa, sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2933,9 +2933,9 @@ sycl::event gemm(backend_selector selector, transpose transa, sycl::event gemm(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2943,9 +2943,9 @@ sycl::event gemm(backend_selector selector, transpose transa, } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2953,9 +2953,9 @@ sycl::event gemm(backend_selector selector, transpose transa, } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2964,10 +2964,10 @@ sycl::event gemm(backend_selector selector, transpose transa, sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2976,10 +2976,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tr sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2988,10 +2988,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tr sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3000,10 +3000,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tr sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -3011,35 +3011,35 @@ sycl::event gemm_bias(backend_selector selector, transpose tr } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; @@ -3047,8 +3047,8 @@ sycl::event ger(backend_selector selector, std::int64_t m, st sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3057,8 +3057,8 @@ sycl::event trsm(backend_selector selector, side left_right, sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3067,9 +3067,9 @@ sycl::event trsm(backend_selector selector, side left_right, sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3078,9 +3078,9 @@ sycl::event trsm(backend_selector selector, side left_right, sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3089,9 +3089,9 @@ sycl::event trsm(backend_selector selector, side left_right, sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3100,9 +3100,9 @@ sycl::event trsm_batch(backend_selector selector, side left_r sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3111,10 +3111,10 @@ sycl::event trsm_batch(backend_selector selector, side left_r sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3123,56 +3123,56 @@ sycl::event trsm_batch(backend_selector selector, side left_r sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3180,18 +3180,18 @@ sycl::event trsm_batch(backend_selector selector, side *left_ } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotu(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; @@ -3199,9 +3199,9 @@ sycl::event dotu(backend_selector selector, std::int64_t n, sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3210,9 +3210,9 @@ sycl::event hemm(backend_selector selector, side left_right, sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3220,27 +3220,27 @@ sycl::event hemm(backend_selector selector, side left_right, } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3248,9 +3248,9 @@ sycl::event gbmv(backend_selector selector, transpose trans, } sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3259,9 +3259,9 @@ sycl::event gbmv(backend_selector selector, transpose trans, sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3270,9 +3270,9 @@ sycl::event gbmv(backend_selector selector, transpose trans, sycl::event gbmv(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3280,43 +3280,43 @@ sycl::event gbmv(backend_selector selector, transpose trans, } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3324,9 +3324,9 @@ sycl::event symm(backend_selector selector, side left_right, } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3335,9 +3335,9 @@ sycl::event symm(backend_selector selector, side left_right, sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3346,9 +3346,9 @@ sycl::event symm(backend_selector selector, side left_right, sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3356,34 +3356,34 @@ sycl::event symm(backend_selector selector, side left_right, } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotc(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; @@ -3391,8 +3391,8 @@ sycl::event syr(backend_selector selector, uplo upper_lower, sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3401,8 +3401,8 @@ sycl::event trmm(backend_selector selector, side left_right, sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3411,9 +3411,9 @@ sycl::event trmm(backend_selector selector, side left_right, sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3422,225 +3422,225 @@ sycl::event trmm(backend_selector selector, side left_right, sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, double *x1, - double y1, double *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, +sycl::event copy_batch(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { + transpose transb, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3648,9 +3648,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { + transpose transb, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3659,9 +3659,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3670,9 +3670,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3680,9 +3680,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3690,9 +3690,9 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, } sycl::event sbmv(backend_selector selector, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3700,166 +3700,166 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event asum(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, const std::vector &dependencies) { + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, const std::vector &dependencies) { + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamax(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotm(backend_selector selector, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, + float* s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } sycl::event sdsdot(backend_selector selector, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result, dependencies); return done; @@ -3867,9 +3867,9 @@ sycl::event sdsdot(backend_selector selector, std::int64_t n, sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3878,62 +3878,62 @@ sycl::event her2k(backend_selector selector, uplo upper_lower sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3941,10 +3941,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpo } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3953,9 +3953,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpo sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3964,9 +3964,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpo sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3974,18 +3974,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpo } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -3993,9 +3993,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpo sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4003,9 +4003,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpo sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -4013,10 +4013,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpo sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4025,10 +4025,10 @@ sycl::event omatadd_batch(backend_selector selector, transpos sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4037,11 +4037,11 @@ sycl::event omatadd_batch(backend_selector selector, transpos sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4050,11 +4050,11 @@ sycl::event omatadd_batch(backend_selector selector, transpos sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -4062,115 +4062,115 @@ sycl::event omatadd_batch(backend_selector selector, transpos } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, - std::int64_t ldc, const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4178,9 +4178,9 @@ sycl::event omatadd(backend_selector selector, transpose tran } sycl::event omatadd(backend_selector selector, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, - std::int64_t ldc, const std::vector &dependencies) { + transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4189,9 +4189,9 @@ sycl::event omatadd(backend_selector selector, transpose tran sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4200,95 +4200,95 @@ sycl::event omatadd(backend_selector selector, transpose tran sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event omatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, float **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, float** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, double **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, double** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); return done; } -sycl::event imatcopy_batch(backend_selector selector, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(backend_selector selector, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::portblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); diff --git a/include/oneapi/mkl/blas/detail/rocblas/blas_ct.hxx b/include/oneapi/mkl/blas/detail/rocblas/blas_ct.hxx index bc86929b0..7410315d2 100644 --- a/include/oneapi/mkl/blas/detail/rocblas/blas_ct.hxx +++ b/include/oneapi/mkl/blas/detail/rocblas/blas_ct.hxx @@ -20,119 +20,119 @@ **************************************************************************/ void herk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, float alpha, sycl::buffer, 1> &a, int64_t lda, float beta, - sycl::buffer, 1> &c, int64_t ldc) { + int64_t k, float alpha, sycl::buffer, 1>& a, int64_t lda, float beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void herk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, double alpha, sycl::buffer, 1> &a, int64_t lda, - double beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t k, double alpha, sycl::buffer, 1>& a, int64_t lda, + double beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::herk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void scal(backend_selector selector, int64_t n, float alpha, - sycl::buffer &x, int64_t incx) { + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, double alpha, - sycl::buffer &x, int64_t incx) { + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx) { + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx) { + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx) { + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void scal(backend_selector selector, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx) { + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::trmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void spr(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { + sycl::buffer& x, int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void spr(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { + sycl::buffer& x, int64_t incx, sycl::buffer& a) { oneapi::mkl::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, float beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -140,9 +140,9 @@ void gemm_batch(backend_selector selector, transpose transa, t } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, double beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -151,9 +151,9 @@ void gemm_batch(backend_selector selector, transpose transa, t void gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -162,9 +162,9 @@ void gemm_batch(backend_selector selector, transpose transa, t void gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -172,9 +172,9 @@ void gemm_batch(backend_selector selector, transpose transa, t } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, sycl::half beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, sycl::half beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -182,9 +182,9 @@ void gemm_batch(backend_selector selector, transpose transa, t } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, float beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -192,9 +192,9 @@ void gemm_batch(backend_selector selector, transpose transa, t } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, float beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -202,9 +202,9 @@ void gemm_batch(backend_selector selector, transpose transa, t } void gemm_batch(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, - int64_t stride_b, float beta, sycl::buffer &c, int64_t ldc, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, + int64_t stride_b, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemm_batch(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, @@ -212,38 +212,38 @@ void gemm_batch(backend_selector selector, transpose transa, t } void syrk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &c, int64_t ldc) { + int64_t k, float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &c, int64_t ldc) { + int64_t k, double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, std::complex beta, sycl::buffer, 1> &c, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, std::complex beta, sycl::buffer, 1> &c, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::syrk(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, float beta, sycl::buffer &c, int64_t ldc, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -251,8 +251,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, t } void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, double beta, sycl::buffer &c, int64_t ldc, + int64_t n, int64_t k, double alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -261,8 +261,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, t void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -271,8 +271,8 @@ void syrk_batch(backend_selector selector, uplo upper_lower, t void syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::syrk_batch(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, @@ -280,247 +280,247 @@ void syrk_batch(backend_selector selector, uplo upper_lower, t } void her2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void her2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void hbmv(backend_selector selector, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void hbmv(backend_selector selector, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void rot(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, float c, float s) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, float c, float s) { oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void rot(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, double c, double s) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, double c, double s) { oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, float c, float s) { +void rot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, float c, float s) { oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } -void rot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, double c, double s) { +void rot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, double c, double s) { oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s); } void axpy(backend_selector selector, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy); } void axpy_batch(backend_selector selector, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, int64_t stridex, sycl::buffer &y, + sycl::buffer& x, int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, int64_t stridex, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { + sycl::buffer& x, int64_t incx, int64_t stridex, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpy_batch(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } void axpby(backend_selector selector, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, float beta, sycl::buffer &y, + sycl::buffer& x, int64_t incx, float beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, double beta, sycl::buffer &y, + sycl::buffer& x, int64_t incx, double beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void axpby(backend_selector selector, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy); } void sdsdot(backend_selector selector, int64_t n, float sb, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &result) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result); } void gerc(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void gerc(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, + int64_t n, int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2k(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, float beta, sycl::buffer &y, int64_t incy) { + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, float beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, double beta, sycl::buffer &y, int64_t incy) { + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, double beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } void gemv_batch(backend_selector selector, transpose trans, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, float beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { + float alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, float beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, double beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { + double alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, double beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } void gemv_batch(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, - int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, + int64_t stridex, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, @@ -528,10 +528,10 @@ void gemv_batch(backend_selector selector, transpose trans, in } void gemv_batch(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::gemv_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, @@ -539,8 +539,8 @@ void gemv_batch(backend_selector selector, transpose trans, in } void dgmm_batch(backend_selector selector, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &c, int64_t ldc, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -548,18 +548,18 @@ void dgmm_batch(backend_selector selector, side left_right, in } void dgmm_batch(backend_selector selector, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { + sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } void dgmm_batch(backend_selector selector, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stridea, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &c, int64_t ldc, int64_t stridec, + sycl::buffer, 1>& a, int64_t lda, int64_t stridea, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -567,9 +567,9 @@ void dgmm_batch(backend_selector selector, side left_right, in } void dgmm_batch(backend_selector selector, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stridea, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &c, int64_t ldc, int64_t stridec, + sycl::buffer, 1>& a, int64_t lda, int64_t stridea, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch(selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, @@ -577,88 +577,88 @@ void dgmm_batch(backend_selector selector, side left_right, in } void her(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void her(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void hpr(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } void hpr(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a) { oneapi::mkl::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a); } -void iamin(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void iamin(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } -void iamin(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void iamin(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void iamin(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result); } void hpmv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void hpmv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, float beta, - sycl::buffer &y, int64_t incy) { + sycl::buffer& a, sycl::buffer& x, int64_t incx, float beta, + sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void spmv(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, double beta, - sycl::buffer &y, int64_t incy) { + sycl::buffer& a, sycl::buffer& x, int64_t incx, double beta, + sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy); } void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, int8_t ao, sycl::buffer &b, - int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, int64_t lda, int8_t ao, sycl::buffer& b, + int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, int64_t ldc, + sycl::buffer& co) { oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); @@ -666,9 +666,9 @@ void gemm_bias(backend_selector selector, transpose transa, tr void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, int8_t ao, sycl::buffer &b, - int64_t ldb, int8_t bo, float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, int64_t lda, int8_t ao, sycl::buffer& b, + int64_t ldb, int8_t bo, float beta, sycl::buffer& c, int64_t ldc, + sycl::buffer& co) { oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); @@ -676,9 +676,9 @@ void gemm_bias(backend_selector selector, transpose transa, tr void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, uint8_t ao, sycl::buffer &b, - int64_t ldb, int8_t bo, float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, int64_t lda, uint8_t ao, sycl::buffer& b, + int64_t ldb, int8_t bo, float beta, sycl::buffer& c, int64_t ldc, + sycl::buffer& co) { oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); @@ -686,83 +686,83 @@ void gemm_bias(backend_selector selector, transpose transa, tr void gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, uint8_t ao, sycl::buffer &b, - int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { + sycl::buffer& a, int64_t lda, uint8_t ao, sycl::buffer& b, + int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, int64_t ldc, + sycl::buffer& co) { oneapi::mkl::blas::rocblas::MAJOR::gemm_bias(selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void swap(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +void swap(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } -void swap(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +void swap(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void swap(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy); } void geru(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void geru(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void nrm2(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void nrm2(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } -void nrm2(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void nrm2(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, + int64_t m, int64_t n, int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -770,161 +770,161 @@ void gemm(backend_selector selector, transpose transa, transpo void gemm(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, sycl::half beta, - sycl::buffer &c, int64_t ldc) { + int64_t m, int64_t n, int64_t k, sycl::half alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, sycl::half beta, + sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, + int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void syr2(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void syr2(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, int64_t m, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void ger(backend_selector selector, int64_t m, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void dotu(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void dotu(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result); } void hemm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hemm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void hpr2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void hpr2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a) { + std::complex alpha, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a) { oneapi::mkl::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx, float beta, sycl::buffer &y, + int64_t kl, int64_t ku, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx, float beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx, double beta, sycl::buffer &y, + int64_t kl, int64_t ku, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx, double beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -932,433 +932,433 @@ void gbmv(backend_selector selector, transpose trans, int64_t void gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tbmv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void symm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { + int64_t n, float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, + int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void symm(backend_selector selector, side left_right, uplo upper_lower, int64_t m, - int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } void dotc(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void dotc(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { oneapi::mkl::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result); } void syr(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void syr(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { oneapi::mkl::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } void trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { oneapi::mkl::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } -void rotmg(backend_selector selector, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(backend_selector selector, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { oneapi::mkl::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, sycl::buffer &x, + diag unit_diag, int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } void trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::trsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void copy(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +void copy(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +void copy(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } void copy(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy); } -void copy_batch(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void copy_batch(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void copy_batch(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void copy_batch(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::copy_batch(selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size); } void hemv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void hemv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::hemv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, float beta, - sycl::buffer &c, int64_t ldc) { + transpose transb, int64_t n, int64_t k, float alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, float beta, + sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, int64_t n, int64_t k, double alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, double beta, - sycl::buffer &c, int64_t ldc) { + transpose transb, int64_t n, int64_t k, double alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, double beta, + sycl::buffer& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void asum(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void asum(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } -void asum(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void asum(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result); } void sbmv(backend_selector selector, uplo upper_lower, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, float beta, sycl::buffer &y, int64_t incy) { + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, float beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void sbmv(backend_selector selector, uplo upper_lower, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, double beta, sycl::buffer &y, int64_t incy) { + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, double beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx) { + diag unit_diag, int64_t n, int64_t k, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx) { oneapi::mkl::blas::rocblas::MAJOR::tbsv(selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } void spr2(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { oneapi::mkl::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } void spr2(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { oneapi::mkl::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a); } -void iamax(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void iamax(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void iamax(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &result) { +void iamax(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } void iamax(backend_selector selector, int64_t n, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result); } -void rotm(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& param) { oneapi::mkl::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void rotm(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { +void rotm(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& param) { oneapi::mkl::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param); } -void dot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } -void dot(backend_selector selector, int64_t n, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(backend_selector selector, int64_t n, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& result) { oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result); } void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, @@ -1367,8 +1367,8 @@ void trsm_batch(backend_selector selector, side left_right, up void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1376,8 +1376,8 @@ void trsm_batch(backend_selector selector, side left_right, up void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, @@ -1386,8 +1386,8 @@ void trsm_batch(backend_selector selector, side left_right, up void trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::trsm_batch(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, @@ -1395,69 +1395,69 @@ void trsm_batch(backend_selector selector, side left_right, up } void her2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, float beta, - sycl::buffer, 1> &c, int64_t ldc) { + int64_t n, int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, float beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } void her2k(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, double beta, - sycl::buffer, 1> &c, int64_t ldc) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, double beta, + sycl::buffer, 1>& c, int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(backend_selector selector, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } -void rotg(backend_selector selector, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(backend_selector selector, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s); } void symv(backend_selector selector, uplo upper_lower, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void symv(backend_selector selector, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { oneapi::mkl::blas::rocblas::MAJOR::symv(selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1465,8 +1465,8 @@ void omatcopy_batch(backend_selector selector, transpose trans void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); @@ -1474,22 +1474,22 @@ void omatcopy_batch(backend_selector selector, transpose trans void omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch(selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1497,7 +1497,7 @@ void imatcopy_batch(backend_selector selector, transpose trans void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); @@ -1505,16 +1505,16 @@ void imatcopy_batch(backend_selector selector, transpose trans void imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, @@ -1522,9 +1522,9 @@ void omatadd_batch(backend_selector selector, transpose transa } void omatadd_batch(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, @@ -1533,9 +1533,9 @@ void omatadd_batch(backend_selector selector, transpose transa void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, @@ -1544,10 +1544,10 @@ void omatadd_batch(backend_selector selector, transpose transa void omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch(selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, @@ -1555,123 +1555,123 @@ void omatadd_batch(backend_selector selector, transpose transa } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { oneapi::mkl::blas::rocblas::MAJOR::omatcopy2(selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } void omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } @@ -1679,239 +1679,239 @@ void omatadd(backend_selector selector, transpose transa, tran // USM APIs sycl::event syr2(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *x, int64_t incx, const float *y, int64_t incy, float *a, - int64_t lda, const std::vector &dependencies) { + float alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* a, + int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *x, int64_t incx, const double *y, int64_t incy, - double *a, int64_t lda, const std::vector &dependencies) { + double alpha, const double* x, int64_t incx, const double* y, int64_t incy, + double* a, int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } -sycl::event scal(backend_selector selector, int64_t n, float alpha, float *x, - int64_t incx, const std::vector &dependencies) { +sycl::event scal(backend_selector selector, int64_t n, float alpha, float* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } -sycl::event scal(backend_selector selector, int64_t n, double alpha, double *x, - int64_t incx, const std::vector &dependencies) { +sycl::event scal(backend_selector selector, int64_t n, double alpha, double* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, int64_t n, std::complex alpha, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, int64_t n, std::complex alpha, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, int64_t n, float alpha, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event scal(backend_selector selector, int64_t n, double alpha, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::scal(selector.get_queue(), n, alpha, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trmv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const float *a, float *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const float* a, float* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const double *a, double *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const double* a, double* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tpmv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *x, int64_t incx, float *a, - const std::vector &dependencies) { + float alpha, const float* x, int64_t incx, float* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event spr(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *x, int64_t incx, double *a, - const std::vector &dependencies) { + double alpha, const double* x, int64_t incx, double* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::spr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event hpmv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hpmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, float beta, - float *c, int64_t ldc, const std::vector &dependencies) { + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, float beta, + float* c, int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, double beta, - double *c, int64_t ldc, const std::vector &dependencies) { + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, double beta, + double* c, int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event syrk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, int64_t *n, int64_t *k, float *alpha, const float **a, - int64_t *lda, float *beta, float **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, int64_t* n, int64_t* k, float* alpha, const float** a, + int64_t* lda, float* beta, float** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, int64_t *n, int64_t *k, double *alpha, const double **a, - int64_t *lda, double *beta, double **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, int64_t* n, int64_t* k, double* alpha, const double** a, + int64_t* lda, double* beta, double** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex* beta, + std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event syrk_batch(backend_selector selector, uplo *upper_lower, - transpose *trans, int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event syrk_batch(backend_selector selector, uplo* upper_lower, + transpose* trans, int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex* beta, + std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); @@ -1919,10 +1919,10 @@ sycl::event syrk_batch(backend_selector selector, uplo *upper_ } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, int64_t n, int64_t k, float alpha, const float *a, - int64_t lda, int64_t stride_a, float beta, float *c, int64_t ldc, + transpose trans, int64_t n, int64_t k, float alpha, const float* a, + int64_t lda, int64_t stride_a, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1930,10 +1930,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_l } sycl::event syrk_batch(backend_selector selector, uplo upper_lower, - transpose trans, int64_t n, int64_t k, double alpha, const double *a, - int64_t lda, int64_t stride_a, double beta, double *c, int64_t ldc, + transpose trans, int64_t n, int64_t k, double alpha, const double* a, + int64_t lda, int64_t stride_a, double beta, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1942,10 +1942,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_l sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex beta, std::complex *c, int64_t ldc, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1954,10 +1954,10 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_l sycl::event syrk_batch(backend_selector selector, uplo upper_lower, transpose trans, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex beta, std::complex *c, int64_t ldc, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syrk_batch( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); @@ -1965,28 +1965,28 @@ sycl::event syrk_batch(backend_selector selector, uplo upper_l } sycl::event her2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event her2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::her2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event hbmv(backend_selector selector, uplo upper_lower, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -1994,120 +1994,120 @@ sycl::event hbmv(backend_selector selector, uplo upper_lower, } sycl::event hbmv(backend_selector selector, uplo upper_lower, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } -sycl::event rot(backend_selector selector, int64_t n, std::complex *x, - int64_t incx, std::complex *y, int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, int64_t n, std::complex* x, + int64_t incx, std::complex* y, int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, int64_t n, std::complex *x, - int64_t incx, std::complex *y, int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, int64_t n, std::complex* x, + int64_t incx, std::complex* y, int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, int64_t n, float *x, int64_t incx, - float *y, int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, int64_t n, float* x, int64_t incx, + float* y, int64_t incy, float c, float s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } -sycl::event rot(backend_selector selector, int64_t n, double *x, int64_t incx, - double *y, int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(backend_selector selector, int64_t n, double* x, int64_t incx, + double* y, int64_t incy, double c, double s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rot(selector.get_queue(), n, x, incx, y, incy, c, s, dependencies); return done; } sycl::event axpy(backend_selector selector, int64_t n, float alpha, - const float *x, int64_t incx, float *y, int64_t incy, - const std::vector &dependencies) { + const float* x, int64_t incx, float* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, int64_t n, double alpha, - const double *x, int64_t incx, double *y, int64_t incy, - const std::vector &dependencies) { + const double* x, int64_t incx, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } sycl::event axpy(backend_selector selector, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy(selector.get_queue(), n, alpha, x, incx, y, incy, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, int64_t *n, float *alpha, - const float **x, int64_t *incx, float **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, int64_t* n, float* alpha, + const float** x, int64_t* incx, float** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, int64_t *n, double *alpha, - const double **x, int64_t *incx, double **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, int64_t* n, double* alpha, + const double** x, int64_t* incx, double** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, int64_t *n, - std::complex *alpha, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, int64_t* n, + std::complex* alpha, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event axpy_batch(backend_selector selector, int64_t *n, - std::complex *alpha, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(backend_selector selector, int64_t* n, + std::complex* alpha, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch( selector.get_queue(), n, alpha, x, incx, y, incy, group_count, group_size, dependencies); return done; } sycl::event axpy_batch(backend_selector selector, int64_t n, float alpha, - const float *x, int64_t incx, int64_t stridex, float *y, int64_t incy, + const float* x, int64_t incx, int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2115,9 +2115,9 @@ sycl::event axpy_batch(backend_selector selector, int64_t n, f } sycl::event axpy_batch(backend_selector selector, int64_t n, double alpha, - const double *x, int64_t incx, int64_t stridex, double *y, int64_t incy, + const double* x, int64_t incx, int64_t stridex, double* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2125,9 +2125,9 @@ sycl::event axpy_batch(backend_selector selector, int64_t n, d } sycl::event axpy_batch(backend_selector selector, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - int64_t stridex, std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + int64_t stridex, std::complex* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2135,9 +2135,9 @@ sycl::event axpy_batch(backend_selector selector, int64_t n, } sycl::event axpy_batch(backend_selector selector, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - int64_t stridex, std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + int64_t stridex, std::complex* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpy_batch(selector.get_queue(), n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); @@ -2145,61 +2145,61 @@ sycl::event axpy_batch(backend_selector selector, int64_t n, } sycl::event axpby(backend_selector selector, int64_t n, float alpha, - const float *x, int64_t incx, const float beta, float *y, int64_t incy, - const std::vector &dependencies) { + const float* x, int64_t incx, const float beta, float* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, int64_t n, double alpha, - const double *x, int64_t incx, const double beta, double *y, int64_t incy, - const std::vector &dependencies) { + const double* x, int64_t incx, const double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event axpby(backend_selector selector, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::axpby(selector.get_queue(), n, alpha, x, incx, beta, y, incy, dependencies); return done; } sycl::event gerc(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event gerc(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gerc(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2207,9 +2207,9 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2217,10 +2217,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2228,10 +2228,10 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event syr2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syr2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2239,47 +2239,47 @@ sycl::event syr2k(backend_selector selector, uplo upper_lower, } sycl::event gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy, const std::vector &dependencies) { + float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta, + float* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, int64_t lda, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv(backend_selector selector, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv(selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemv_batch(backend_selector selector, transpose trans, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stridea, - const float *x, int64_t incx, int64_t stridex, float beta, float *y, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stridea, + const float* x, int64_t incx, int64_t stridex, float beta, float* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2287,10 +2287,10 @@ sycl::event gemv_batch(backend_selector selector, transpose tr } sycl::event gemv_batch(backend_selector selector, transpose trans, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stridea, - const double *x, int64_t incx, int64_t stridex, double beta, double *y, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stridea, + const double* x, int64_t incx, int64_t stridex, double beta, double* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2298,11 +2298,11 @@ sycl::event gemv_batch(backend_selector selector, transpose tr } sycl::event gemv_batch(backend_selector selector, transpose trans, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stridea, const std::complex *x, int64_t incx, - int64_t stridex, std::complex beta, std::complex *y, + int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stridea, const std::complex* x, int64_t incx, + int64_t stridex, std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); @@ -2310,55 +2310,55 @@ sycl::event gemv_batch(backend_selector selector, transpose tr } sycl::event gemv_batch(backend_selector selector, transpose trans, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stridea, const std::complex *x, int64_t incx, - int64_t stridex, std::complex beta, std::complex *y, + int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stridea, const std::complex* x, int64_t incx, + int64_t stridex, std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, int64_t *m, - int64_t *n, float *alpha, const float **a, int64_t *lda, const float **x, - int64_t *incx, float *beta, float **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, int64_t* m, + int64_t* n, float* alpha, const float** a, int64_t* lda, const float** x, + int64_t* incx, float* beta, float** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, int64_t *m, - int64_t *n, double *alpha, const double **a, int64_t *lda, const double **x, - int64_t *incx, double *beta, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, int64_t* m, + int64_t* n, double* alpha, const double** a, int64_t* lda, const double** x, + int64_t* incx, double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, int64_t *m, - int64_t *n, std::complex *alpha, const std::complex **a, - int64_t *lda, const std::complex **x, int64_t *incx, - std::complex *beta, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, int64_t* m, + int64_t* n, std::complex* alpha, const std::complex** a, + int64_t* lda, const std::complex** x, int64_t* incx, + std::complex* beta, std::complex** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); return done; } -sycl::event gemv_batch(backend_selector selector, transpose *trans, int64_t *m, - int64_t *n, std::complex *alpha, const std::complex **a, - int64_t *lda, const std::complex **x, int64_t *incx, - std::complex *beta, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(backend_selector selector, transpose* trans, int64_t* m, + int64_t* n, std::complex* alpha, const std::complex** a, + int64_t* lda, const std::complex** x, int64_t* incx, + std::complex* beta, std::complex** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemv_batch( selector.get_queue(), trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); @@ -2366,9 +2366,9 @@ sycl::event gemv_batch(backend_selector selector, transpose *t } sycl::event dgmm_batch(backend_selector selector, side left_right, int64_t m, - int64_t n, const float *a, int64_t lda, int64_t stridea, const float *x, - int64_t incx, int64_t stridex, float *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { + int64_t n, const float* a, int64_t lda, int64_t stridea, const float* x, + int64_t incx, int64_t stridex, float* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2376,9 +2376,9 @@ sycl::event dgmm_batch(backend_selector selector, side left_ri } sycl::event dgmm_batch(backend_selector selector, side left_right, int64_t m, - int64_t n, const double *a, int64_t lda, int64_t stridea, const double *x, - int64_t incx, int64_t stridex, double *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { + int64_t n, const double* a, int64_t lda, int64_t stridea, const double* x, + int64_t incx, int64_t stridex, double* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2386,10 +2386,10 @@ sycl::event dgmm_batch(backend_selector selector, side left_ri } sycl::event dgmm_batch(backend_selector selector, side left_right, int64_t m, - int64_t n, const std::complex *a, int64_t lda, int64_t stridea, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies) { + int64_t n, const std::complex* a, int64_t lda, int64_t stridea, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); @@ -2397,52 +2397,52 @@ sycl::event dgmm_batch(backend_selector selector, side left_ri } sycl::event dgmm_batch(backend_selector selector, side left_right, int64_t m, - int64_t n, const std::complex *a, int64_t lda, int64_t stridea, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies) { + int64_t n, const std::complex* a, int64_t lda, int64_t stridea, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, int64_t *m, - int64_t *n, const float **a, int64_t *lda, const float **x, int64_t *incx, - float **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, int64_t* m, + int64_t* n, const float** a, int64_t* lda, const float** x, int64_t* incx, + float** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, int64_t *m, - int64_t *n, const double **a, int64_t *lda, const double **x, int64_t *incx, - double **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, int64_t* m, + int64_t* n, const double** a, int64_t* lda, const double** x, int64_t* incx, + double** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, int64_t *m, - int64_t *n, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, int64_t* m, + int64_t* n, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event dgmm_batch(backend_selector selector, side *left_right, int64_t *m, - int64_t *n, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(backend_selector selector, side* left_right, int64_t* m, + int64_t* n, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dgmm_batch( selector.get_queue(), left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); @@ -2450,151 +2450,151 @@ sycl::event dgmm_batch(backend_selector selector, side *left_r } sycl::event her(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const std::complex *x, int64_t incx, std::complex *a, - int64_t lda, const std::vector &dependencies) { + float alpha, const std::complex* x, int64_t incx, std::complex* a, + int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event her(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const std::complex *x, int64_t incx, std::complex *a, - int64_t lda, const std::vector &dependencies) { + double alpha, const std::complex* x, int64_t incx, std::complex* a, + int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::her(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const std::complex *x, int64_t incx, std::complex *a, - const std::vector &dependencies) { + float alpha, const std::complex* x, int64_t incx, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } sycl::event hpr(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const std::complex *x, int64_t incx, std::complex *a, - const std::vector &dependencies) { + double alpha, const std::complex* x, int64_t incx, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hpr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, dependencies); return done; } -sycl::event iamin(backend_selector selector, int64_t n, const float *x, - int64_t incx, int64_t *result, const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, int64_t n, const float* x, + int64_t incx, int64_t* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamin(backend_selector selector, int64_t n, const double *x, - int64_t incx, int64_t *result, const std::vector &dependencies) { +sycl::event iamin(backend_selector selector, int64_t n, const double* x, + int64_t incx, int64_t* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamin(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::iamin(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, - const float **a, int64_t *lda, const float **b, int64_t *ldb, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, float* alpha, + const float** a, int64_t* lda, const float** b, int64_t* ldb, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, double *alpha, - const double **a, int64_t *lda, const double **b, int64_t *ldb, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, double* alpha, + const double** a, int64_t* lda, const double** b, int64_t* ldb, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **b, int64_t *ldb, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** b, int64_t* ldb, std::complex* beta, + std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **b, int64_t *ldb, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** b, int64_t* ldb, std::complex* beta, + std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, sycl::half *alpha, - const sycl::half **a, int64_t *lda, const sycl::half **b, int64_t *ldb, - sycl::half *beta, sycl::half **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, sycl::half* alpha, + const sycl::half** a, int64_t* lda, const sycl::half** b, int64_t* ldb, + sycl::half* beta, sycl::half** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, - const sycl::half **a, int64_t *lda, const sycl::half **b, int64_t *ldb, - float *beta, float **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, float* alpha, + const sycl::half** a, int64_t* lda, const sycl::half** b, int64_t* ldb, + float* beta, float** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, - const std::int8_t **a, int64_t *lda, const std::int8_t **b, int64_t *ldb, - float *beta, float **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, float* alpha, + const std::int8_t** a, int64_t* lda, const std::int8_t** b, int64_t* ldb, + float* beta, float** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); return done; } -sycl::event gemm_batch(backend_selector selector, transpose *transa, - transpose *transb, int64_t *m, int64_t *n, int64_t *k, float *alpha, - const std::int8_t **a, int64_t *lda, const std::int8_t **b, int64_t *ldb, - float *beta, std::int32_t **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(backend_selector selector, transpose* transa, + transpose* transb, int64_t* m, int64_t* n, int64_t* k, float* alpha, + const std::int8_t** a, int64_t* lda, const std::int8_t** b, int64_t* ldb, + float* beta, std::int32_t** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); @@ -2603,9 +2603,9 @@ sycl::event gemm_batch(backend_selector selector, transpose *t sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, int64_t stride_a, const float *b, int64_t ldb, - int64_t stride_b, float beta, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const float* a, int64_t lda, int64_t stride_a, const float* b, int64_t ldb, + int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2614,9 +2614,9 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, int64_t stride_a, const double *b, int64_t ldb, - int64_t stride_b, double beta, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const double* a, int64_t lda, int64_t stride_a, const double* b, int64_t ldb, + int64_t stride_b, double beta, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2625,11 +2625,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex beta, std::complex *c, int64_t ldc, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2638,11 +2638,11 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2651,10 +2651,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, sycl::half alpha, - const sycl::half *a, int64_t lda, int64_t stride_a, const sycl::half *b, - int64_t ldb, int64_t stride_b, sycl::half beta, sycl::half *c, int64_t ldc, + const sycl::half* a, int64_t lda, int64_t stride_a, const sycl::half* b, + int64_t ldb, int64_t stride_b, sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2663,10 +2663,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, float alpha, - const sycl::half *a, int64_t lda, int64_t stride_a, const sycl::half *b, - int64_t ldb, int64_t stride_b, float beta, float *c, int64_t ldc, + const sycl::half* a, int64_t lda, int64_t stride_a, const sycl::half* b, + int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2675,10 +2675,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, float alpha, - const std::int8_t *a, int64_t lda, int64_t stride_a, const std::int8_t *b, - int64_t ldb, int64_t stride_b, float beta, float *c, int64_t ldc, + const std::int8_t* a, int64_t lda, int64_t stride_a, const std::int8_t* b, + int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2687,10 +2687,10 @@ sycl::event gemm_batch(backend_selector selector, transpose tr sycl::event gemm_batch(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, float alpha, - const std::int8_t *a, int64_t lda, int64_t stride_a, const std::int8_t *b, - int64_t ldb, int64_t stride_b, float beta, std::int32_t *c, int64_t ldc, + const std::int8_t* a, int64_t lda, int64_t stride_a, const std::int8_t* b, + int64_t ldb, int64_t stride_b, float beta, std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_batch( selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); @@ -2698,103 +2698,103 @@ sycl::event gemm_batch(backend_selector selector, transpose tr } sycl::event spmv(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *a, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies) { + float alpha, const float* a, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } sycl::event spmv(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *a, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies) { + double alpha, const double* a, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::spmv(selector.get_queue(), upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, int64_t n, float *x, int64_t incx, - float *y, int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, int64_t n, float* x, int64_t incx, + float* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, int64_t n, double *x, int64_t incx, - double *y, int64_t incy, const std::vector &dependencies) { +sycl::event swap(backend_selector selector, int64_t n, double* x, int64_t incx, + double* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, int64_t n, std::complex *x, - int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, int64_t n, std::complex* x, + int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event swap(backend_selector selector, int64_t n, std::complex *x, - int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event swap(backend_selector selector, int64_t n, std::complex* x, + int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::swap(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event geru(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event geru(backend_selector selector, int64_t m, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::geru(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event nrm2(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event nrm2(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, int64_t n, const float *x, - int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, int64_t n, const float* x, + int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event nrm2(backend_selector selector, int64_t n, const double *x, - int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(backend_selector selector, int64_t n, const double* x, + int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::nrm2(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - const float *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, float alpha, const float* a, int64_t lda, + const float* b, int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2802,9 +2802,9 @@ sycl::event gemm(backend_selector selector, transpose transa, } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, double alpha, const double *a, int64_t lda, - const double *b, int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, double alpha, const double* a, int64_t lda, + const double* b, int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2813,9 +2813,9 @@ sycl::event gemm(backend_selector selector, transpose transa, sycl::event gemm(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, const std::complex* b, + int64_t ldb, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2824,9 +2824,9 @@ sycl::event gemm(backend_selector selector, transpose transa, sycl::event gemm(backend_selector selector, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, const std::complex* b, + int64_t ldb, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2834,9 +2834,9 @@ sycl::event gemm(backend_selector selector, transpose transa, } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, sycl::half alpha, const sycl::half *a, - int64_t lda, const sycl::half *b, int64_t ldb, sycl::half beta, sycl::half *c, - int64_t ldc, const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, sycl::half alpha, const sycl::half* a, + int64_t lda, const sycl::half* b, int64_t ldb, sycl::half beta, sycl::half* c, + int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2844,9 +2844,9 @@ sycl::event gemm(backend_selector selector, transpose transa, } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, const sycl::half *a, int64_t lda, - const sycl::half *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, float alpha, const sycl::half* a, int64_t lda, + const sycl::half* b, int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2854,9 +2854,9 @@ sycl::event gemm(backend_selector selector, transpose transa, } sycl::event gemm(backend_selector selector, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, float alpha, const bfloat16 *a, int64_t lda, - const bfloat16 *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, int64_t k, float alpha, const bfloat16* a, int64_t lda, + const bfloat16* b, int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm(selector.get_queue(), transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -2865,10 +2865,10 @@ sycl::event gemm(backend_selector selector, transpose transa, sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, - float alpha, const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, int64_t lda, std::int8_t ao, + const std::uint8_t* b, int64_t ldb, std::uint8_t bo, float beta, + std::int32_t* c, int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2877,10 +2877,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tra sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, - float alpha, const std::int8_t *a, int64_t lda, std::int8_t ao, - const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, int64_t lda, std::int8_t ao, + const std::int8_t* b, int64_t ldb, std::int8_t bo, float beta, + std::int32_t* c, int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2889,10 +2889,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tra sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, - float alpha, const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::int8_t *b, int64_t ldb, std::int8_t bo, float beta, - std::int32_t *c, int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + float alpha, const std::uint8_t* a, int64_t lda, std::uint8_t ao, + const std::int8_t* b, int64_t ldb, std::int8_t bo, float beta, + std::int32_t* c, int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2901,10 +2901,10 @@ sycl::event gemm_bias(backend_selector selector, transpose tra sycl::event gemm_bias(backend_selector selector, transpose transa, transpose transb, offset offsetc, int64_t m, int64_t n, int64_t k, - float alpha, const std::uint8_t *a, int64_t lda, std::uint8_t ao, - const std::uint8_t *b, int64_t ldb, std::uint8_t bo, float beta, - std::int32_t *c, int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + float alpha, const std::uint8_t* a, int64_t lda, std::uint8_t ao, + const std::uint8_t* b, int64_t ldb, std::uint8_t bo, float beta, + std::int32_t* c, int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemm_bias( selector.get_queue(), transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); @@ -2912,42 +2912,42 @@ sycl::event gemm_bias(backend_selector selector, transpose tra } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, float alpha, const std::complex *a, int64_t lda, - float beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, float alpha, const std::complex* a, int64_t lda, + float beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event herk(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, double alpha, const std::complex *a, int64_t lda, - double beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, double alpha, const std::complex* a, int64_t lda, + double beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::herk( selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); return done; } sycl::event ger(backend_selector selector, int64_t m, int64_t n, float alpha, - const float *x, int64_t incx, const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies) { + const float* x, int64_t incx, const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event ger(backend_selector selector, int64_t m, int64_t n, double alpha, - const double *x, int64_t incx, const double *y, int64_t incy, double *a, - int64_t lda, const std::vector &dependencies) { + const double* x, int64_t incx, const double* y, int64_t incy, double* a, + int64_t lda, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::ger(selector.get_queue(), m, n, alpha, x, incx, y, incy, a, lda, dependencies); return done; } sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, float *b, int64_t ldb, const std::vector &dependencies) { + transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, float* b, int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -2956,8 +2956,8 @@ sycl::event trsm(backend_selector selector, side left_right, u sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies) { + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -2966,8 +2966,8 @@ sycl::event trsm(backend_selector selector, side left_right, u sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -2976,8 +2976,8 @@ sycl::event trsm(backend_selector selector, side left_right, u sycl::event trsm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -2986,9 +2986,9 @@ sycl::event trsm(backend_selector selector, side left_right, u sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, int64_t stride_a, float *b, + float alpha, const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -2997,9 +2997,9 @@ sycl::event trsm_batch(backend_selector selector, side left_ri sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, int64_t stride_a, double *b, + double alpha, const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3008,9 +3008,9 @@ sycl::event trsm_batch(backend_selector selector, side left_ri sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3019,53 +3019,53 @@ sycl::event trsm_batch(backend_selector selector, side left_ri sycl::event trsm_batch(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, + float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, + double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); return done; } -sycl::event trsm_batch(backend_selector selector, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(backend_selector selector, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsm_batch( selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); @@ -3073,28 +3073,28 @@ sycl::event trsm_batch(backend_selector selector, side *left_r } sycl::event dotu(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotu(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dotu(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3102,10 +3102,10 @@ sycl::event hemm(backend_selector selector, side left_right, u } sycl::event hemm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hemm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3113,27 +3113,27 @@ sycl::event hemm(backend_selector selector, side left_right, u } sycl::event hpr2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event hpr2(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *a, - const std::vector &dependencies) { + std::complex alpha, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hpr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, float alpha, const float *a, int64_t lda, const float *x, - int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies) { + int64_t kl, int64_t ku, float alpha, const float* a, int64_t lda, const float* x, + int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3141,9 +3141,9 @@ sycl::event gbmv(backend_selector selector, transpose trans, i } sycl::event gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, double alpha, const double *a, int64_t lda, - const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies) { + int64_t kl, int64_t ku, double alpha, const double* a, int64_t lda, + const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3151,10 +3151,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, i } sycl::event gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + int64_t kl, int64_t ku, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3162,10 +3162,10 @@ sycl::event gbmv(backend_selector selector, transpose trans, i } sycl::event gbmv(backend_selector selector, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { + int64_t kl, int64_t ku, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gbmv(selector.get_queue(), trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3173,43 +3173,43 @@ sycl::event gbmv(backend_selector selector, transpose trans, i } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const float *a, int64_t lda, float *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const float* a, int64_t lda, float* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const double *a, int64_t lda, double *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const double* a, int64_t lda, double* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbmv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tbmv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3217,9 +3217,9 @@ sycl::event symm(backend_selector selector, side left_right, u } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3227,10 +3227,10 @@ sycl::event symm(backend_selector selector, side left_right, u } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3238,10 +3238,10 @@ sycl::event symm(backend_selector selector, side left_right, u } sycl::event symm(backend_selector selector, side left_right, uplo upper_lower, - int64_t m, int64_t n, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t m, int64_t n, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::symm(selector.get_queue(), left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3249,42 +3249,42 @@ sycl::event symm(backend_selector selector, side left_right, u } sycl::event dotc(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event dotc(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dotc(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *x, int64_t incx, float *a, int64_t lda, - const std::vector &dependencies) { + float alpha, const float* x, int64_t incx, float* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event syr(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *x, int64_t incx, double *a, int64_t lda, - const std::vector &dependencies) { + double alpha, const double* x, int64_t incx, double* a, int64_t lda, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::syr(selector.get_queue(), upper_lower, n, alpha, x, incx, a, lda, dependencies); return done; } sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, float *b, int64_t ldb, const std::vector &dependencies) { + transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, float* b, int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3293,8 +3293,8 @@ sycl::event trmm(backend_selector selector, side left_right, u sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies) { + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3303,8 +3303,8 @@ sycl::event trmm(backend_selector selector, side left_right, u sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); @@ -3313,218 +3313,218 @@ sycl::event trmm(backend_selector selector, side left_right, u sycl::event trmm(backend_selector selector, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trmm(selector.get_queue(), left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); return done; } -sycl::event rotmg(backend_selector selector, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } -sycl::event rotmg(backend_selector selector, double *d1, double *d2, double *x1, - double y1, double *param, const std::vector &dependencies) { +sycl::event rotmg(backend_selector selector, double* d1, double* d2, double* x1, + double y1, double* param, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rotmg(selector.get_queue(), d1, d2, x1, y1, param, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const float *a, float *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const float* a, float* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const double *a, double *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const double* a, double* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event tpsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tpsv(selector.get_queue(), upper_lower, trans, unit_diag, n, a, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } sycl::event trsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::trsv( selector.get_queue(), upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); return done; } -sycl::event copy(backend_selector selector, int64_t n, const float *x, - int64_t incx, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, int64_t n, const float* x, + int64_t incx, float* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy(backend_selector selector, int64_t n, const double *x, - int64_t incx, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event copy(backend_selector selector, int64_t n, const double* x, + int64_t incx, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } sycl::event copy(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy(selector.get_queue(), n, x, incx, y, incy, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t *n, const float **x, - int64_t *incx, float **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t* n, const float** x, + int64_t* incx, float** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t *n, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t* n, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t *n, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t* n, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t *n, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t* n, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, y, incy, group_count, group_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t n, const float *x, - int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t n, const float* x, + int64_t incx, int64_t stridex, float* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } -sycl::event copy_batch(backend_selector selector, int64_t n, const double *x, - int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(backend_selector selector, int64_t n, const double* x, + int64_t incx, int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event copy_batch(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::copy_batch( selector.get_queue(), n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event hemv(backend_selector selector, uplo upper_lower, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::hemv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - const float *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { + transpose transb, int64_t n, int64_t k, float alpha, const float* a, int64_t lda, + const float* b, int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3532,9 +3532,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower, } sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, - transpose transb, int64_t n, int64_t k, double alpha, const double *a, - int64_t lda, const double *b, int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { + transpose transb, int64_t n, int64_t k, double alpha, const double* a, + int64_t lda, const double* b, int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3543,9 +3543,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower, sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, const std::complex* b, + int64_t ldb, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3554,9 +3554,9 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower, sycl::event gemmt(backend_selector selector, uplo upper_lower, transpose transa, transpose transb, int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, const std::complex* b, + int64_t ldb, std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::gemmt(selector.get_queue(), upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3564,8 +3564,8 @@ sycl::event gemmt(backend_selector selector, uplo upper_lower, } sycl::event sbmv(backend_selector selector, uplo upper_lower, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, const float *x, int64_t incx, - float beta, float *y, int64_t incy, const std::vector &dependencies) { + int64_t k, float alpha, const float* a, int64_t lda, const float* x, int64_t incx, + float beta, float* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3573,9 +3573,9 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, } sycl::event sbmv(backend_selector selector, uplo upper_lower, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, const double *x, - int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies) { + int64_t k, double alpha, const double* a, int64_t lda, const double* x, + int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::sbmv(selector.get_queue(), upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); @@ -3583,174 +3583,174 @@ sycl::event sbmv(backend_selector selector, uplo upper_lower, } sycl::event asum(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, float *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event asum(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, double *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, int64_t n, const float *x, - int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(backend_selector selector, int64_t n, const float* x, + int64_t incx, float* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event asum(backend_selector selector, int64_t n, const double *x, - int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(backend_selector selector, int64_t n, const double* x, + int64_t incx, double* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::asum(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const float *a, int64_t lda, float *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const float* a, int64_t lda, float* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const double *a, int64_t lda, double *x, - int64_t incx, const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const double* a, int64_t lda, double* x, + int64_t incx, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event tbsv(backend_selector selector, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::tbsv( selector.get_queue(), upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *x, int64_t incx, const float *y, int64_t incy, float *a, - const std::vector &dependencies) { + float alpha, const float* x, int64_t incx, const float* y, int64_t incy, float* a, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } sycl::event spr2(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *x, int64_t incx, const double *y, int64_t incy, - double *a, const std::vector &dependencies) { + double alpha, const double* x, int64_t incx, const double* y, int64_t incy, + double* a, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::spr2(selector.get_queue(), upper_lower, n, alpha, x, incx, y, incy, a, dependencies); return done; } -sycl::event iamax(backend_selector selector, int64_t n, const float *x, - int64_t incx, int64_t *result, const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, int64_t n, const float* x, + int64_t incx, int64_t* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event iamax(backend_selector selector, int64_t n, const double *x, - int64_t incx, int64_t *result, const std::vector &dependencies) { +sycl::event iamax(backend_selector selector, int64_t n, const double* x, + int64_t incx, int64_t* result, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } sycl::event iamax(backend_selector selector, int64_t n, - const std::complex *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { + const std::complex* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::iamax(selector.get_queue(), n, x, incx, result, dependencies); return done; } -sycl::event rotm(backend_selector selector, int64_t n, float *x, int64_t incx, - float *y, int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, int64_t n, float* x, int64_t incx, + float* y, int64_t incy, float* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotm(backend_selector selector, int64_t n, double *x, int64_t incx, - double *y, int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(backend_selector selector, int64_t n, double* x, int64_t incx, + double* y, int64_t incy, double* param, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rotm(selector.get_queue(), n, x, incx, y, incy, param, dependencies); return done; } -sycl::event rotg(backend_selector selector, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, float* a, float* b, float* c, + float* s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event rotg(backend_selector selector, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(backend_selector selector, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::rotg(selector.get_queue(), a, b, c, s, dependencies); return done; } -sycl::event sdsdot(backend_selector selector, int64_t n, float sb, const float *x, - int64_t incx, const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(backend_selector selector, int64_t n, float sb, const float* x, + int64_t incx, const float* y, int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::sdsdot(selector.get_queue(), n, sb, x, incx, y, incy, result, dependencies); return done; } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, float beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, float beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); @@ -3758,62 +3758,62 @@ sycl::event her2k(backend_selector selector, uplo upper_lower, } sycl::event her2k(backend_selector selector, uplo upper_lower, transpose trans, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, double beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, double beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::her2k(selector.get_queue(), upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); return done; } -sycl::event dot(backend_selector selector, int64_t n, const float *x, - int64_t incx, const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, int64_t n, const float* x, + int64_t incx, const float* y, int64_t incy, float* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, int64_t n, const double *x, - int64_t incx, const double *y, int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, int64_t n, const double* x, + int64_t incx, const double* y, int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } -sycl::event dot(backend_selector selector, int64_t n, const float *x, - int64_t incx, const float *y, int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(backend_selector selector, int64_t n, const float* x, + int64_t incx, const float* y, int64_t incy, double* result, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::dot(selector.get_queue(), n, x, incx, y, incy, result, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, int64_t n, - float alpha, const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy, const std::vector &dependencies) { + float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta, + float* y, int64_t incy, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event symv(backend_selector selector, uplo upper_lower, int64_t n, - double alpha, const double *a, int64_t lda, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { + double alpha, const double* a, int64_t lda, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::symv( selector.get_queue(), upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); return done; } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3821,10 +3821,10 @@ sycl::event omatcopy_batch(backend_selector selector, transpos } sycl::event omatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3833,9 +3833,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpos sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3844,9 +3844,9 @@ sycl::event omatcopy_batch(backend_selector selector, transpos sycl::event omatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); @@ -3854,18 +3854,18 @@ sycl::event omatcopy_batch(backend_selector selector, transpos } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; } sycl::event imatcopy_batch(backend_selector selector, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -3873,9 +3873,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpos sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -3883,9 +3883,9 @@ sycl::event imatcopy_batch(backend_selector selector, transpos sycl::event imatcopy_batch(backend_selector selector, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch( selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); return done; @@ -3893,10 +3893,10 @@ sycl::event imatcopy_batch(backend_selector selector, transpos sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -3905,10 +3905,10 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -3917,11 +3917,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -3930,11 +3930,11 @@ sycl::event omatadd_batch(backend_selector selector, transpose sycl::event omatadd_batch(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd_batch( selector.get_queue(), transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); @@ -3942,115 +3942,115 @@ sycl::event omatadd_batch(backend_selector selector, transpose } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy(selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event omatcopy2(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy2( selector.get_queue(), trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event imatcopy(backend_selector selector, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, dependencies); return done; } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float beta, const float *b, std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float beta, const float* b, std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4058,9 +4058,9 @@ sycl::event omatadd(backend_selector selector, transpose trans } sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - double beta, const double *b, std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies) { + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + double beta, const double* b, std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4069,9 +4069,9 @@ sycl::event omatadd(backend_selector selector, transpose trans sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4080,9 +4080,9 @@ sycl::event omatadd(backend_selector selector, transpose trans sycl::event omatadd(backend_selector selector, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatadd(selector.get_queue(), transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); @@ -4115,8 +4115,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpos std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4127,8 +4126,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpos std::int64_t* m, std::int64_t* n, std::complex* alpha, const std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::omatcopy_batch( selector.get_queue(), trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); @@ -4138,8 +4136,7 @@ sycl::event omatcopy_batch(backend_selector selector, transpos sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); @@ -4149,8 +4146,7 @@ sycl::event imatcopy_batch(backend_selector selector, transpos sycl::event imatcopy_batch(backend_selector selector, transpose* trans, std::int64_t* m, std::int64_t* n, double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t* groupsize, - const std::vector& dependencies) { + std::int64_t* groupsize, const std::vector& dependencies) { auto done = oneapi::mkl::blas::rocblas::MAJOR::imatcopy_batch(selector.get_queue(), trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); diff --git a/include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hxx b/include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hxx index 70aabaaf9..f6c3eeee5 100644 --- a/include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hxx +++ b/include/oneapi/mkl/blas/detail/rocblas/onemkl_blas_rocblas.hxx @@ -21,2102 +21,2102 @@ // Buffer APIs -void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void axpy(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void axpy(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void axpy(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void axpy(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void axpy(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void axpy(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void axpy(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void axpy(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy); +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy); -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy); +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy); -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void copy(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void copy(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result); -void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result); -void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result); -void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void iamin(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void iamax(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result); -void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, float c, float s); +void rot(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, float c, float s); -void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, double c, double s); +void rot(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, double c, double s); -void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, float c, float s); +void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, float c, float s); -void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, double c, double s); +void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, double c, double s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param); -void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param); -void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx); +void scal(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx); +void scal(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx); +void scal(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx); +void scal(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, int64_t n, float alpha, sycl::buffer, 1>& x, int64_t incx); -void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, int64_t n, double alpha, sycl::buffer, 1>& x, int64_t incx); -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result); +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result); -void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy); +void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy); -void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void swap(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy); +void swap(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, float beta, sycl::buffer &y, int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, float beta, sycl::buffer& y, int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, double beta, sycl::buffer &y, int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, double beta, sycl::buffer& y, int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy); +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy); -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy); +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy); -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); -void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, float beta, sycl::buffer &y, int64_t incy, +void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, float beta, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, double beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size); +void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, double beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, - int64_t stridex, std::complex beta, sycl::buffer, 1> &y, +void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, + int64_t stridex, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, +void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &c, int64_t ldc, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size); +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stridea, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &c, int64_t ldc, int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stridea, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stridea, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &c, int64_t ldc, int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stridea, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& c, int64_t ldc, int64_t stridec, int64_t batch_size); -void ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, int64_t m, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda); -void ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, int64_t m, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda); -void gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); - -void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy); - -void her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda); - -void her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda); - -void her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda); - -void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); + +void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy); + +void her(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda); + +void her(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda); + +void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + int64_t incx, std::complex beta, sycl::buffer, 1>& y, int64_t incy); -void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + int64_t incx, std::complex beta, sycl::buffer, 1>& y, int64_t incy); -void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a); -void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a); -void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy); -void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, int64_t incx, float beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, int64_t incx, float beta, sycl::buffer& y, int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, int64_t incx, double beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, int64_t incx, double beta, sycl::buffer& y, int64_t incy); -void spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a); -void spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a); -void symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx, float beta, - sycl::buffer &y, int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx, float beta, + sycl::buffer& y, int64_t incy); -void symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx, double beta, - sycl::buffer &y, int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx, double beta, + sycl::buffer& y, int64_t incy); -void syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a, int64_t lda); +void syr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a, int64_t lda); -void syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a, int64_t lda); +void syr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a, int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, sycl::buffer &x, int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, sycl::buffer& x, int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, sycl::buffer &x, int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, sycl::buffer& x, int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, sycl::buffer &x, int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, sycl::buffer& x, int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, sycl::buffer &x, int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, sycl::buffer& x, int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx); - -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); + +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); - -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); - -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, double beta, sycl::buffer &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - sycl::half alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, sycl::half beta, - sycl::buffer &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc); - -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer, 1> &a, int64_t lda, float beta, - sycl::buffer, 1> &c, int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, - sycl::buffer, 1> &a, int64_t lda, double beta, - sycl::buffer, 1> &c, int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, float beta, - sycl::buffer, 1> &c, int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, double beta, - sycl::buffer, 1> &c, int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - float beta, sycl::buffer &c, int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - double beta, sycl::buffer &c, int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, float beta, sycl::buffer &c, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); + +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); + +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, double beta, sycl::buffer& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + sycl::half alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, sycl::half beta, + sycl::buffer& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc); + +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer, 1>& a, int64_t lda, float beta, + sycl::buffer, 1>& c, int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, + sycl::buffer, 1>& a, int64_t lda, double beta, + sycl::buffer, 1>& c, int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, float beta, + sycl::buffer, 1>& c, int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, double beta, + sycl::buffer, 1>& c, int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + float beta, sycl::buffer& c, int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + double beta, sycl::buffer& c, int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, float beta, sycl::buffer& c, int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, - sycl::buffer &a, int64_t lda, double beta, sycl::buffer &c, +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, + sycl::buffer& a, int64_t lda, double beta, sycl::buffer& c, int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc); +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc); -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc); +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - float beta, sycl::buffer &c, int64_t ldc); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + float beta, sycl::buffer& c, int64_t ldc); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, double beta, sycl::buffer &c, int64_t ldc); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, double beta, sycl::buffer& c, int64_t ldc); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc); +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb); +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb); +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb); +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); - -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); - -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, double beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); - -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, - int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, double beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); + +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, + int64_t stride_b, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc); - -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc); - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co); - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co); - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co); - -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co); - -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size); - -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size); - -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc); + +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co); + +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co); + +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); + +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size); + +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc); // USM APIs -sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + float* result, const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + double* result, const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* result, + const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, float *y, - int64_t incy, const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, float* y, + int64_t incy, const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double *y, int64_t incy, const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double* y, int64_t incy, const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - const float beta, float *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + const float beta, float* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - const double beta, double *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + const double beta, double* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *y, - int64_t incy, const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* y, + int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *y, - int64_t incy, const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* y, + int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, float **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, double **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t stridex, - float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t stridex, + float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t stridex, std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t stridex, std::complex* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t stridex, std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t stridex, std::complex* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, float *result, const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, float* result, const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, int64_t n, const double *x, int64_t incx, const double *y, - int64_t incy, double *result, const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, int64_t n, const double* x, int64_t incx, const double* y, + int64_t incy, double* result, const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies = {}); -sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies = {}); -sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies = {}); -sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies = {}); -sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, int64_t n, const double *x, int64_t incx, int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, int64_t n, const double* x, int64_t incx, int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, int64_t n, const double *x, int64_t incx, int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, int64_t n, const double* x, int64_t incx, int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + float* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + double* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* result, + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, float c, float s, - const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, float c, float s, + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, double c, double s, - const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, double c, double s, + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - float c, float s, const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + float c, float s, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - double c, double s, const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + double c, double s, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, float *c, - std::complex *s, const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, double *c, - std::complex *s, const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - float *param, const std::vector &dependencies = {}); +sycl::event rotm(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + float* param, const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - double *param, const std::vector &dependencies = {}); +sycl::event rotm(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + double* param, const std::vector& dependencies = {}); -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, - const std::vector &dependencies = {}); +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param, + const std::vector& dependencies = {}); -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, - const std::vector &dependencies = {}); +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param, + const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, float alpha, float *x, int64_t incx, - const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, float alpha, float* x, int64_t incx, + const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, double alpha, double *x, int64_t incx, - const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, double alpha, double* x, int64_t incx, + const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, std::complex alpha, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, std::complex alpha, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, std::complex alpha, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, std::complex alpha, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, float alpha, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, float alpha, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); -sycl::event scal(sycl::queue &queue, int64_t n, double alpha, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, int64_t n, double alpha, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies = {}); +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy, const std::vector &dependencies = {}); +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta, + float* y, int64_t incy, const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, const double *a, int64_t lda, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + double alpha, const double* a, int64_t lda, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies = {}); +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies = {}); -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies = {}); +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies = {}); -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stridea, const float *x, int64_t incx, - int64_t stridex, float beta, float *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stridea, const float* x, int64_t incx, + int64_t stridex, float beta, float* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stridea, const double *x, int64_t incx, - int64_t stridex, double beta, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stridea, const double* x, int64_t incx, + int64_t stridex, double beta, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stridea, const std::complex *x, int64_t incx, int64_t stridex, - std::complex beta, std::complex *y, int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stridea, const std::complex* x, int64_t incx, int64_t stridex, + std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stridea, const std::complex *x, int64_t incx, - int64_t stridex, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stridea, const std::complex* x, int64_t incx, + int64_t stridex, std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, const float **x, int64_t *incx, float *beta, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, const double **x, int64_t *incx, - double *beta, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const float *a, - int64_t lda, int64_t stridea, const float *x, int64_t incx, int64_t stridex, - float *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const double *a, - int64_t lda, int64_t stridea, const double *x, int64_t incx, int64_t stridex, - double *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stridea, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stridea, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, double **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, const float *x, int64_t incx, - const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *x, - int64_t incx, std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *x, - int64_t incx, std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - const std::complex *x, int64_t incx, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - const std::complex *x, int64_t incx, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - const std::complex *x, int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - const std::complex *x, int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *a, - const float *x, int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *a, - const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, float *a, const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, double *a, const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, const float *y, int64_t incy, float *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *a, - int64_t lda, const float *x, int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *a, - int64_t lda, const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, float *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, double *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, int64_t lda, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, - float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, const sycl::half *b, - int64_t ldb, sycl::half beta, sycl::half *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, const sycl::half *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const bfloat16 *a, int64_t lda, const bfloat16 *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const std::complex *a, int64_t lda, float beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const std::complex *a, int64_t lda, double beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, float beta, std::complex *c, - int64_t ldc, const std::vector &dependencies = {}); - -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, double beta, std::complex *c, - int64_t ldc, const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, float beta, - float *c, int64_t ldc, const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, const double *b, int64_t ldb, - double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stride_a, float beta, - float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stride_a, double beta, - double *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, const double** x, int64_t* incx, + double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stridea, const float* x, int64_t incx, int64_t stridex, + float* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stridea, const double* x, int64_t incx, int64_t stridex, + double* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stridea, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stridea, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* c, int64_t ldc, int64_t stridec, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, int64_t m, int64_t n, float alpha, const float* x, int64_t incx, + const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, int64_t m, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, const std::complex* x, + int64_t incx, std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, const std::complex* x, + int64_t incx, std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + const std::complex* x, int64_t incx, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + const std::complex* x, int64_t incx, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + const std::complex* x, int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + const std::complex* x, int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, float alpha, + const float* a, int64_t lda, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, double alpha, + const double* a, int64_t lda, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* a, + const float* x, int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* a, + const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, float* a, const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, double* a, const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, const float* y, int64_t incy, float* a, + const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* a, + int64_t lda, const float* x, int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* a, + int64_t lda, const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, float* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, double* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, int64_t lda, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, + float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, const sycl::half* b, + int64_t ldb, sycl::half beta, sycl::half* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, const sycl::half* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const bfloat16* a, int64_t lda, const bfloat16* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const std::complex* a, int64_t lda, float beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const std::complex* a, int64_t lda, double beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, float beta, std::complex* c, + int64_t ldc, const std::vector& dependencies = {}); + +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, double beta, std::complex* c, + int64_t ldc, const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, float beta, + float* c, int64_t ldc, const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, const double* b, int64_t ldb, + double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, float beta, - float *c, int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, const double *b, int64_t ldb, - double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, int64_t lda, - float *b, int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, int64_t lda, - double *b, int64_t ldb, const std::vector &dependencies = {}); - -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, float beta, + float* c, int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, const double* b, int64_t ldb, + double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, int64_t lda, + float* b, int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, int64_t lda, + double* b, int64_t ldb, const std::vector& dependencies = {}); + +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, int64_t lda, - float *b, int64_t ldb, const std::vector &dependencies = {}); +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, int64_t lda, + float* b, int64_t ldb, const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, int64_t lda, - double *b, int64_t ldb, const std::vector &dependencies = {}); +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, int64_t lda, + double* b, int64_t ldb, const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, float *alpha, const float **a, - int64_t *lda, float **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, double *alpha, const double **a, - int64_t *lda, double **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex **b, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex **b, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, - const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, - const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, const std::complex **b, - int64_t *ldb, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, const std::complex **b, - int64_t *ldb, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, sycl::half *alpha, const sycl::half **a, - int64_t *lda, const sycl::half **b, int64_t *ldb, sycl::half *beta, - sycl::half **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const sycl::half **a, int64_t *lda, - const sycl::half **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, std::int32_t **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, int64_t stride_a, - const float *b, int64_t ldb, int64_t stride_b, float beta, float *c, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, float* alpha, const float** a, + int64_t* lda, float** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, double* alpha, const double** a, + int64_t* lda, double** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const float** a, int64_t* lda, + const float** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, double* alpha, const double** a, int64_t* lda, + const double** b, int64_t* ldb, double* beta, double** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, sycl::half* alpha, const sycl::half** a, + int64_t* lda, const sycl::half** b, int64_t* ldb, sycl::half* beta, + sycl::half** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const sycl::half** a, int64_t* lda, + const sycl::half** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, std::int32_t** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, int64_t stride_a, + const float* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, int64_t stride_a, - const double *b, int64_t ldb, int64_t stride_b, double beta, double *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, int64_t stride_a, + const double* b, int64_t ldb, int64_t stride_b, double beta, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, - int64_t stride_a, const sycl::half *b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::half *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, + int64_t stride_a, const sycl::half* b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, int64_t stride_a, - const sycl::half *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, int64_t stride_a, + const sycl::half* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, - std::int32_t *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, - int64_t lda, std::int8_t ao, const std::uint8_t *b, int64_t ldb, - std::uint8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t *a, - int64_t lda, std::int8_t ao, const std::int8_t *b, int64_t ldb, - std::int8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t *a, - int64_t lda, std::uint8_t ao, const std::int8_t *b, int64_t ldb, - std::int8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t *a, - int64_t lda, std::uint8_t ao, const std::uint8_t *b, int64_t ldb, - std::uint8_t bo, float beta, std::int32_t *c, int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, + std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t* a, + int64_t lda, std::int8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const std::int8_t* a, + int64_t lda, std::int8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t* a, + int64_t lda, std::uint8_t ao, const std::int8_t* b, int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); + +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const std::uint8_t* a, + int64_t lda, std::uint8_t ao, const std::uint8_t* b, int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies = {}); +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies = {}); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, std::int64_t stridea, float *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, std::int64_t stridea, double *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float beta, const float *b, - int64_t ldb, float *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double beta, const double *b, - int64_t ldb, double *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, float* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, std::int64_t stridea, float* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, std::int64_t stridea, double* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, float beta, const float* b, + int64_t ldb, float* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, double beta, const double* b, + int64_t ldb, double* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies = {}); sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, diff --git a/include/oneapi/mkl/detail/exceptions.hpp b/include/oneapi/mkl/detail/exceptions.hpp index 7767c2ac3..18eeca5b1 100644 --- a/include/oneapi/mkl/detail/exceptions.hpp +++ b/include/oneapi/mkl/detail/exceptions.hpp @@ -31,14 +31,14 @@ namespace mkl { class backend_not_found : public oneapi::mkl::exception { public: - backend_not_found(const std::string &info = "") + backend_not_found(const std::string& info = "") : oneapi::mkl::exception( "", "", ((info.length() != 0) ? info : "Couldn't load selected backend")) {} }; class function_not_found : public oneapi::mkl::exception { public: - function_not_found(const std::string &info = "") + function_not_found(const std::string& info = "") : oneapi::mkl::exception( "", "", ((info.length() != 0) ? info : "Couldn't load functions from selected backend")) { @@ -47,7 +47,7 @@ class function_not_found : public oneapi::mkl::exception { class specification_mismatch : public oneapi::mkl::exception { public: - specification_mismatch(const std::string &info = "") + specification_mismatch(const std::string& info = "") : oneapi::mkl::exception( "", "", ((info.length() != 0) ? info : "Loaded oneMKL specification version mismatch")) {} diff --git a/include/oneapi/mkl/detail/get_device_id.hpp b/include/oneapi/mkl/detail/get_device_id.hpp index 32c30f1cb..2eb9f07c9 100644 --- a/include/oneapi/mkl/detail/get_device_id.hpp +++ b/include/oneapi/mkl/detail/get_device_id.hpp @@ -40,7 +40,7 @@ namespace oneapi { namespace mkl { -inline oneapi::mkl::device get_device_id(sycl::queue &queue) { +inline oneapi::mkl::device get_device_id(sycl::queue& queue) { oneapi::mkl::device device_id; if (queue.get_device().is_cpu()) device_id = device::x86cpu; diff --git a/include/oneapi/mkl/dft/backward.hpp b/include/oneapi/mkl/dft/backward.hpp index 3cd03e13b..becca85d0 100644 --- a/include/oneapi/mkl/dft/backward.hpp +++ b/include/oneapi/mkl/dft/backward.hpp @@ -33,7 +33,7 @@ namespace oneapi::mkl::dft { //In-place transform template -void compute_backward(descriptor_type &desc, sycl::buffer &inout) { +void compute_backward(descriptor_type& desc, sycl::buffer& inout) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); @@ -46,8 +46,8 @@ void compute_backward(descriptor_type &desc, sycl::buffer &inout) //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template , bool> = true> -void compute_backward(descriptor_type &desc, sycl::buffer &inout_re, - sycl::buffer &inout_im) { +void compute_backward(descriptor_type& desc, sycl::buffer& inout_re, + sycl::buffer& inout_im) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); @@ -61,8 +61,8 @@ void compute_backward(descriptor_type &desc, sycl::buffer &inout_r //Out-of-place transform template -void compute_backward(descriptor_type &desc, sycl::buffer &in, - sycl::buffer &out) { +void compute_backward(descriptor_type& desc, sycl::buffer& in, + sycl::buffer& out) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -79,9 +79,9 @@ void compute_backward(descriptor_type &desc, sycl::buffer &in, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -void compute_backward(descriptor_type &desc, sycl::buffer &in_re, - sycl::buffer &in_im, sycl::buffer &out_re, - sycl::buffer &out_im) { +void compute_backward(descriptor_type& desc, sycl::buffer& in_re, + sycl::buffer& in_im, sycl::buffer& out_re, + sycl::buffer& out_im) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -104,34 +104,32 @@ void compute_backward(descriptor_type &desc, sycl::buffer &in_re, //In-place transform template -sycl::event compute_backward(descriptor_type &desc, data_type *inout, - const std::vector &dependencies = {}) { +sycl::event compute_backward(descriptor_type& desc, data_type* inout, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); using fwd_type = typename detail::descriptor_info::forward_type; - return get_commit(desc)->backward_ip_cc(desc, reinterpret_cast(inout), - dependencies); + return get_commit(desc)->backward_ip_cc(desc, reinterpret_cast(inout), dependencies); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template , bool> = true> -sycl::event compute_backward(descriptor_type &desc, data_type *inout_re, data_type *inout_im, - const std::vector &dependencies = {}) { +sycl::event compute_backward(descriptor_type& desc, data_type* inout_re, data_type* inout_im, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); using scalar_type = typename detail::descriptor_info::scalar_type; - return get_commit(desc)->backward_ip_rr(desc, reinterpret_cast(inout_re), - reinterpret_cast(inout_im), - dependencies); + return get_commit(desc)->backward_ip_rr(desc, reinterpret_cast(inout_re), + reinterpret_cast(inout_im), dependencies); } //Out-of-place transform template -sycl::event compute_backward(descriptor_type &desc, input_type *in, output_type *out, - const std::vector &dependencies = {}) { +sycl::event compute_backward(descriptor_type& desc, input_type* in, output_type* out, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -139,25 +137,25 @@ sycl::event compute_backward(descriptor_type &desc, input_type *in, output_type using fwd_type = typename detail::descriptor_info::forward_type; using bwd_type = typename detail::descriptor_info::backward_type; - return get_commit(desc)->backward_op_cc(desc, reinterpret_cast(in), - reinterpret_cast(out), dependencies); + return get_commit(desc)->backward_op_cc(desc, reinterpret_cast(in), + reinterpret_cast(out), dependencies); } //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -sycl::event compute_backward(descriptor_type &desc, input_type *in_re, input_type *in_im, - output_type *out_re, output_type *out_im, - const std::vector &dependencies = {}) { +sycl::event compute_backward(descriptor_type& desc, input_type* in_re, input_type* in_im, + output_type* out_re, output_type* out_im, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, "unexpected type for output_type"); using scalar_type = typename detail::descriptor_info::scalar_type; - return get_commit(desc)->backward_op_rr(desc, reinterpret_cast(in_re), - reinterpret_cast(in_im), - reinterpret_cast(out_re), - reinterpret_cast(out_im), dependencies); + return get_commit(desc)->backward_op_rr(desc, reinterpret_cast(in_re), + reinterpret_cast(in_im), + reinterpret_cast(out_re), + reinterpret_cast(out_im), dependencies); } } // namespace oneapi::mkl::dft diff --git a/include/oneapi/mkl/dft/detail/commit_impl.hpp b/include/oneapi/mkl/dft/detail/commit_impl.hpp index 9e827f357..0c1a1e0b2 100644 --- a/include/oneapi/mkl/dft/detail/commit_impl.hpp +++ b/include/oneapi/mkl/dft/detail/commit_impl.hpp @@ -54,18 +54,18 @@ class commit_impl { public: commit_impl(sycl::queue queue, mkl::backend backend, - const dft::detail::dft_values &config_values) + const dft::detail::dft_values& config_values) : queue_(queue), backend_(backend), external_workspace_helper_(config_values.workspace_placement == dft::detail::config_value::WORKSPACE_EXTERNAL) {} // rule of three - commit_impl(const commit_impl &other) = delete; - commit_impl &operator=(const commit_impl &other) = delete; + commit_impl(const commit_impl& other) = delete; + commit_impl& operator=(const commit_impl& other) = delete; virtual ~commit_impl() = default; - sycl::queue &get_queue() noexcept { + sycl::queue& get_queue() noexcept { return queue_; } @@ -73,9 +73,9 @@ class commit_impl { return backend_; } - virtual void *get_handle() noexcept = 0; + virtual void* get_handle() noexcept = 0; - virtual void commit(const dft_values &) = 0; + virtual void commit(const dft_values&) = 0; inline std::int64_t get_workspace_external_bytes() { return external_workspace_helper_.get_rqd_workspace_bytes(*this); @@ -87,54 +87,54 @@ class commit_impl { // When not overridden, external workspace support is faked: an external workspace can be set, // and errors will be generated according to the specificiation, // but the required workspace size will always be zero, and any given workspace will not actually be used. - virtual void set_workspace(scalar_type *usm_workspace) { + virtual void set_workspace(scalar_type* usm_workspace) { external_workspace_helper_.set_workspace_throw(*this, usm_workspace); } - virtual void set_workspace(sycl::buffer &buffer_workspace) { + virtual void set_workspace(sycl::buffer& buffer_workspace) { external_workspace_helper_.set_workspace_throw(*this, buffer_workspace); } - virtual void forward_ip_cc(descriptor_type &desc, sycl::buffer &inout) = 0; - virtual void forward_ip_rr(descriptor_type &desc, sycl::buffer &inout_re, - sycl::buffer &inout_im) = 0; - virtual void forward_op_cc(descriptor_type &desc, sycl::buffer &in, - sycl::buffer &out) = 0; - virtual void forward_op_rr(descriptor_type &desc, sycl::buffer &in_re, - sycl::buffer &in_im, - sycl::buffer &out_re, - sycl::buffer &out_im) = 0; - - virtual sycl::event forward_ip_cc(descriptor_type &desc, fwd_type *inout, - const std::vector &dependencies) = 0; - virtual sycl::event forward_ip_rr(descriptor_type &desc, scalar_type *inout_re, - scalar_type *inout_im, - const std::vector &dependencies) = 0; - virtual sycl::event forward_op_cc(descriptor_type &desc, fwd_type *in, bwd_type *out, - const std::vector &dependencies) = 0; - virtual sycl::event forward_op_rr(descriptor_type &desc, scalar_type *in_re, scalar_type *in_im, - scalar_type *out_re, scalar_type *out_im, - const std::vector &dependencies) = 0; - - virtual void backward_ip_cc(descriptor_type &desc, sycl::buffer &inout) = 0; - virtual void backward_ip_rr(descriptor_type &desc, sycl::buffer &inout_re, - sycl::buffer &inout_im) = 0; - virtual void backward_op_cc(descriptor_type &desc, sycl::buffer &in, - sycl::buffer &out) = 0; - virtual void backward_op_rr(descriptor_type &desc, sycl::buffer &in_re, - sycl::buffer &in_im, - sycl::buffer &out_re, - sycl::buffer &out_im) = 0; - - virtual sycl::event backward_ip_cc(descriptor_type &desc, fwd_type *inout, - const std::vector &dependencies) = 0; - virtual sycl::event backward_ip_rr(descriptor_type &desc, scalar_type *inout_re, - scalar_type *inout_im, - const std::vector &dependencies) = 0; - virtual sycl::event backward_op_cc(descriptor_type &desc, bwd_type *in, fwd_type *out, - const std::vector &dependencies) = 0; - virtual sycl::event backward_op_rr(descriptor_type &desc, scalar_type *in_re, - scalar_type *in_im, scalar_type *out_re, scalar_type *out_im, - const std::vector &dependencies) = 0; + virtual void forward_ip_cc(descriptor_type& desc, sycl::buffer& inout) = 0; + virtual void forward_ip_rr(descriptor_type& desc, sycl::buffer& inout_re, + sycl::buffer& inout_im) = 0; + virtual void forward_op_cc(descriptor_type& desc, sycl::buffer& in, + sycl::buffer& out) = 0; + virtual void forward_op_rr(descriptor_type& desc, sycl::buffer& in_re, + sycl::buffer& in_im, + sycl::buffer& out_re, + sycl::buffer& out_im) = 0; + + virtual sycl::event forward_ip_cc(descriptor_type& desc, fwd_type* inout, + const std::vector& dependencies) = 0; + virtual sycl::event forward_ip_rr(descriptor_type& desc, scalar_type* inout_re, + scalar_type* inout_im, + const std::vector& dependencies) = 0; + virtual sycl::event forward_op_cc(descriptor_type& desc, fwd_type* in, bwd_type* out, + const std::vector& dependencies) = 0; + virtual sycl::event forward_op_rr(descriptor_type& desc, scalar_type* in_re, scalar_type* in_im, + scalar_type* out_re, scalar_type* out_im, + const std::vector& dependencies) = 0; + + virtual void backward_ip_cc(descriptor_type& desc, sycl::buffer& inout) = 0; + virtual void backward_ip_rr(descriptor_type& desc, sycl::buffer& inout_re, + sycl::buffer& inout_im) = 0; + virtual void backward_op_cc(descriptor_type& desc, sycl::buffer& in, + sycl::buffer& out) = 0; + virtual void backward_op_rr(descriptor_type& desc, sycl::buffer& in_re, + sycl::buffer& in_im, + sycl::buffer& out_re, + sycl::buffer& out_im) = 0; + + virtual sycl::event backward_ip_cc(descriptor_type& desc, fwd_type* inout, + const std::vector& dependencies) = 0; + virtual sycl::event backward_ip_rr(descriptor_type& desc, scalar_type* inout_re, + scalar_type* inout_im, + const std::vector& dependencies) = 0; + virtual sycl::event backward_op_cc(descriptor_type& desc, bwd_type* in, fwd_type* out, + const std::vector& dependencies) = 0; + virtual sycl::event backward_op_rr(descriptor_type& desc, scalar_type* in_re, + scalar_type* in_im, scalar_type* out_re, scalar_type* out_im, + const std::vector& dependencies) = 0; /** For compute calls, throw errors for the external workspace as required. * @tparam ArgTs The non-descriptor arg(s) for the compute call. First one is used to check @@ -142,7 +142,7 @@ class commit_impl { * @param function_name The function name to user in generated exceptions. */ template - void compute_call_throw(const char *function_name) { + void compute_call_throw(const char* function_name) { external_workspace_helper_.template compute_call_throw(function_name); } @@ -151,14 +151,14 @@ class commit_impl { * @param function_name The function name to user in generated exceptions. * @param cgh The command group handler to associate the accessor with. */ - void add_buffer_workspace_dependency_if_rqd(const char *function_name, sycl::handler &cgh) { + void add_buffer_workspace_dependency_if_rqd(const char* function_name, sycl::handler& cgh) { external_workspace_helper_.add_buffer_dependency_if_rqd(function_name, cgh); } /** If WORKSPACE_EXTERNAL is set, depend on the last USM workspace event added via set_last_usm_workspace_event. * @param cgh The command group handler to associate the accessor with. */ - void depend_on_last_usm_workspace_event_if_rqd(sycl::handler &cgh) { + void depend_on_last_usm_workspace_event_if_rqd(sycl::handler& cgh) { external_workspace_helper_.depend_on_last_usm_workspace_event_if_rqd(cgh); } @@ -166,7 +166,7 @@ class commit_impl { * subsequent calls to depend_on_last_usm_workspace_event. * @param sycl_event The last usage of the USM workspace. */ - void set_last_usm_workspace_event_if_rqd(sycl::event &sycl_event) { + void set_last_usm_workspace_event_if_rqd(sycl::event& sycl_event) { external_workspace_helper_.set_last_usm_workspace_event_if_rqd(sycl_event); } diff --git a/include/oneapi/mkl/dft/detail/dft_ct.hxx b/include/oneapi/mkl/dft/detail/dft_ct.hxx index 20cd537d8..7fc2921e4 100644 --- a/include/oneapi/mkl/dft/detail/dft_ct.hxx +++ b/include/oneapi/mkl/dft/detail/dft_ct.hxx @@ -20,8 +20,8 @@ // Commit template -ONEMKL_EXPORT dft::detail::commit_impl *create_commit( - const dft::detail::descriptor &desc, sycl::queue &sycl_queue); +ONEMKL_EXPORT dft::detail::commit_impl* create_commit( + const dft::detail::descriptor& desc, sycl::queue& sycl_queue); // BUFFER version @@ -34,105 +34,105 @@ using bwd = typename detail::descriptor_info::backward_type; //In-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout); +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout); //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im); +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im); //Out-of-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out); +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer, 1>& in, + sycl::buffer, 1>& out); //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im); +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im); //USM version //In-place transform template -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies); +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies); //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &dependencies); +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& dependencies); //Out-of-place transform template -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &dependencies); +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& dependencies); //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &dependencies); +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& dependencies); // BUFFER version //In-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout); +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout); //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im); +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im); //Out-of-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out); +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out); //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im); +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im); //USM version //In-place transform template -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies); +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies); //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &dependencies); +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& dependencies); //Out-of-place transform template -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &dependencies); +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& dependencies); //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &dependencies); +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& dependencies); diff --git a/include/oneapi/mkl/dft/detail/types_impl.hpp b/include/oneapi/mkl/dft/detail/types_impl.hpp index 60eb922ab..5dad2302e 100644 --- a/include/oneapi/mkl/dft/detail/types_impl.hpp +++ b/include/oneapi/mkl/dft/detail/types_impl.hpp @@ -113,7 +113,7 @@ using valid_compute_arg = typename std::bool_constant< template constexpr bool valid_ip_realreal_impl = - is_complex_dft&& std::is_same_v, data_t>; + is_complex_dft && std::is_same_v, data_t>; // compute the range of a reinterpreted buffer template diff --git a/include/oneapi/mkl/dft/forward.hpp b/include/oneapi/mkl/dft/forward.hpp index e43c39ce0..0eeecd497 100644 --- a/include/oneapi/mkl/dft/forward.hpp +++ b/include/oneapi/mkl/dft/forward.hpp @@ -34,7 +34,7 @@ namespace oneapi::mkl::dft { //In-place transform template -void compute_forward(descriptor_type &desc, sycl::buffer &inout) { +void compute_forward(descriptor_type& desc, sycl::buffer& inout) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); @@ -47,8 +47,8 @@ void compute_forward(descriptor_type &desc, sycl::buffer &inout) { //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template , bool> = true> -void compute_forward(descriptor_type &desc, sycl::buffer &inout_re, - sycl::buffer &inout_im) { +void compute_forward(descriptor_type& desc, sycl::buffer& inout_re, + sycl::buffer& inout_im) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); @@ -62,8 +62,8 @@ void compute_forward(descriptor_type &desc, sycl::buffer &inout_re //Out-of-place transform template -void compute_forward(descriptor_type &desc, sycl::buffer &in, - sycl::buffer &out) { +void compute_forward(descriptor_type& desc, sycl::buffer& in, + sycl::buffer& out) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -80,9 +80,9 @@ void compute_forward(descriptor_type &desc, sycl::buffer &in, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -void compute_forward(descriptor_type &desc, sycl::buffer &in_re, - sycl::buffer &in_im, sycl::buffer &out_re, - sycl::buffer &out_im) { +void compute_forward(descriptor_type& desc, sycl::buffer& in_re, + sycl::buffer& in_im, sycl::buffer& out_re, + sycl::buffer& out_im) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, @@ -105,56 +105,56 @@ void compute_forward(descriptor_type &desc, sycl::buffer &in_re, //In-place transform template -sycl::event compute_forward(descriptor_type &desc, data_type *inout, - const std::vector &dependencies = {}) { +sycl::event compute_forward(descriptor_type& desc, data_type* inout, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); using fwd_type = typename detail::descriptor_info::forward_type; - return get_commit(desc)->forward_ip_cc(desc, reinterpret_cast(inout), dependencies); + return get_commit(desc)->forward_ip_cc(desc, reinterpret_cast(inout), dependencies); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template , bool> = true> -sycl::event compute_forward(descriptor_type &desc, data_type *inout_re, data_type *inout_im, - const std::vector &dependencies = {}) { +sycl::event compute_forward(descriptor_type& desc, data_type* inout_re, data_type* inout_im, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for data_type"); using scalar_type = typename detail::descriptor_info::scalar_type; - return get_commit(desc)->forward_ip_rr(desc, reinterpret_cast(inout_re), - reinterpret_cast(inout_im), dependencies); + return get_commit(desc)->forward_ip_rr(desc, reinterpret_cast(inout_re), + reinterpret_cast(inout_im), dependencies); } //Out-of-place transform template -sycl::event compute_forward(descriptor_type &desc, input_type *in, output_type *out, - const std::vector &dependencies = {}) { +sycl::event compute_forward(descriptor_type& desc, input_type* in, output_type* out, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, "unexpected type for output_type"); using fwd_type = typename detail::descriptor_info::forward_type; using bwd_type = typename detail::descriptor_info::backward_type; - return get_commit(desc)->forward_op_cc(desc, reinterpret_cast(in), - reinterpret_cast(out), dependencies); + return get_commit(desc)->forward_op_cc(desc, reinterpret_cast(in), + reinterpret_cast(out), dependencies); } //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -sycl::event compute_forward(descriptor_type &desc, input_type *in_re, input_type *in_im, - output_type *out_re, output_type *out_im, - const std::vector &dependencies = {}) { +sycl::event compute_forward(descriptor_type& desc, input_type* in_re, input_type* in_im, + output_type* out_re, output_type* out_im, + const std::vector& dependencies = {}) { static_assert(detail::valid_compute_arg::value, "unexpected type for input_type"); static_assert(detail::valid_compute_arg::value, "unexpected type for output_type"); using scalar_type = typename detail::descriptor_info::scalar_type; - return get_commit(desc)->forward_op_rr(desc, reinterpret_cast(in_re), - reinterpret_cast(in_im), - reinterpret_cast(out_re), - reinterpret_cast(out_im), dependencies); + return get_commit(desc)->forward_op_rr(desc, reinterpret_cast(in_re), + reinterpret_cast(in_im), + reinterpret_cast(out_re), + reinterpret_cast(out_im), dependencies); } } // namespace oneapi::mkl::dft diff --git a/include/oneapi/mkl/exceptions.hpp b/include/oneapi/mkl/exceptions.hpp index 244c8c61d..8047f7676 100644 --- a/include/oneapi/mkl/exceptions.hpp +++ b/include/oneapi/mkl/exceptions.hpp @@ -38,7 +38,7 @@ class exception : public std::exception { std::string msg_; public: - exception(const std::string &domain, const std::string &function, const std::string &info = "") + exception(const std::string& domain, const std::string& function, const std::string& info = "") : std::exception() { msg_ = std::string("oneMKL: ") + domain + ((domain.length() != 0 && function.length() != 0) ? "/" : "") + function + @@ -47,15 +47,15 @@ class exception : public std::exception { : ""); } - const char *what() const noexcept override { + const char* what() const noexcept override { return msg_.c_str(); } }; class unsupported_device : public oneapi::mkl::exception { public: - unsupported_device(const std::string &domain, const std::string &function, - const sycl::device &device) + unsupported_device(const std::string& domain, const std::string& function, + const sycl::device& device) : oneapi::mkl::exception( domain, function, device.get_info() + " is not supported") {} @@ -63,14 +63,14 @@ class unsupported_device : public oneapi::mkl::exception { class host_bad_alloc : public oneapi::mkl::exception { public: - host_bad_alloc(const std::string &domain, const std::string &function) + host_bad_alloc(const std::string& domain, const std::string& function) : oneapi::mkl::exception(domain, function, "cannot allocate memory on host") {} }; class device_bad_alloc : public oneapi::mkl::exception { public: - device_bad_alloc(const std::string &domain, const std::string &function, - const sycl::device &device) + device_bad_alloc(const std::string& domain, const std::string& function, + const sycl::device& device) : oneapi::mkl::exception( domain, function, "cannot allocate memory on " + device.get_info()) {} @@ -78,30 +78,30 @@ class device_bad_alloc : public oneapi::mkl::exception { class unimplemented : public oneapi::mkl::exception { public: - unimplemented(const std::string &domain, const std::string &function, - const std::string &info = "") + unimplemented(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::mkl::exception(domain, function, "function is not implemented " + info) {} }; class invalid_argument : public oneapi::mkl::exception { public: - invalid_argument(const std::string &domain, const std::string &function, - const std::string &info = "") + invalid_argument(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::mkl::exception(domain, function, "invalid argument " + info) {} }; class uninitialized : public oneapi::mkl::exception { public: - uninitialized(const std::string &domain, const std::string &function, - const std::string &info = "") + uninitialized(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::mkl::exception(domain, function, "handle/descriptor is not initialized " + info) {} }; class computation_error : public oneapi::mkl::exception { public: - computation_error(const std::string &domain, const std::string &function, - const std::string &info = "") + computation_error(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::mkl::exception( domain, function, "computation error" + ((info.length() != 0) ? (": " + info) : "")) {} @@ -109,16 +109,16 @@ class computation_error : public oneapi::mkl::exception { class batch_error : public oneapi::mkl::exception { public: - batch_error(const std::string &domain, const std::string &function, - const std::string &info = "") + batch_error(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::mkl::exception(domain, function, "batch error" + ((info.length() != 0) ? (": " + info) : "")) {} }; class library_not_found : public oneapi::mkl::exception { public: - library_not_found(const std::string &domain, const std::string &function, - const std::string &info = "") + library_not_found(const std::string& domain, const std::string& function, + const std::string& info = "") : oneapi::mkl::exception( domain, function, "library not found" + ((info.length() != 0) ? (": " + info) : "")) {} diff --git a/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hxx b/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hxx index cd1d76765..0b1d58ba1 100644 --- a/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hxx +++ b/include/oneapi/mkl/lapack/detail/cusolver/lapack_ct.hxx @@ -20,760 +20,760 @@ // Buffer APIs static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, - std::int64_t ldvt, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, @@ -781,10 +781,10 @@ static inline void getri_batch(backend_selector selector, std } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, @@ -792,10 +792,10 @@ static inline void getrs_batch(backend_selector selector, } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, @@ -803,11 +803,11 @@ static inline void getrs_batch(backend_selector selector, } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, @@ -815,126 +815,126 @@ static inline void getrs_batch(backend_selector selector, } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, @@ -942,31 +942,31 @@ static inline void potrs_batch(backend_selector selector, one } static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); @@ -975,350 +975,350 @@ static inline void ungqr_batch(backend_selector selector, std // USM APIs static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, - float *e, std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, - double *tauq, double *taup, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, - float *tauq, float *taup, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } static inline sycl::event heevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *w, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event heevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *w, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, float *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, double *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1326,233 +1326,233 @@ static inline sycl::event ormtr(backend_selector selector, static inline sycl::event ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *w, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* w, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *w, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* w, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tau, double *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tau, float *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); @@ -1560,19 +1560,19 @@ static inline sycl::event trtrs(backend_selector selector, static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); @@ -1580,71 +1580,71 @@ static inline sycl::event trtrs(backend_selector selector, static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1652,11 +1652,11 @@ static inline sycl::event unmrq(backend_selector selector, static inline sycl::event unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1664,10 +1664,10 @@ static inline sycl::event unmrq(backend_selector selector, static inline sycl::event unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1675,11 +1675,11 @@ static inline sycl::event unmqr(backend_selector selector, static inline sycl::event unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1687,10 +1687,10 @@ static inline sycl::event unmqr(backend_selector selector, static inline sycl::event unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1698,592 +1698,592 @@ static inline sycl::event unmtr(backend_selector selector, static inline sycl::event unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, - std::int64_t **ipiv, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, - std::int64_t **ipiv, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( - backend_selector selector, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + backend_selector selector, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, +static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::cusolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); @@ -2566,62 +2566,62 @@ std::int64_t ungqr_batch_scratchpad_size(backend_selector sel } template std::int64_t getrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::cusolver::getrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template std::int64_t getri_batch_scratchpad_size(backend_selector selector, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::cusolver::getri_batch_scratchpad_size( selector.get_queue(), n, lda, group_count, group_sizes); } template std::int64_t getrs_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::cusolver::getrs_batch_scratchpad_size( selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes); } template std::int64_t geqrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::cusolver::geqrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template std::int64_t orgqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::cusolver::orgqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } template std::int64_t potrf_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::cusolver::potrf_batch_scratchpad_size( selector.get_queue(), uplo, n, lda, group_count, group_sizes); } template std::int64_t potrs_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::cusolver::potrs_batch_scratchpad_size( selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template std::int64_t ungqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::cusolver::ungqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } diff --git a/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hxx b/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hxx index ffa9c3007..f0de843fe 100644 --- a/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hxx +++ b/include/oneapi/mkl/lapack/detail/cusolver/onemkl_lapack_cusolver.hxx @@ -19,1812 +19,1812 @@ // Buffer APIs -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, +ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, +ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, +ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, +ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); // USM APIs -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, - double *b, std::int64_t ldb, double *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, - double *vt, std::int64_t ldvt, double *scratchpad, + double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu, + double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt, + std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, +ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *w, double *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *w, float *scratchpad, +ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); // SCRATCHPAD APIs template -ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template -ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +ONEMKL_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, +ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, +ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); diff --git a/include/oneapi/mkl/lapack/detail/lapack_loader.hpp b/include/oneapi/mkl/lapack/detail/lapack_loader.hpp index 2bb49364e..fc5b3d70e 100644 --- a/include/oneapi/mkl/lapack/detail/lapack_loader.hpp +++ b/include/oneapi/mkl/lapack/detail/lapack_loader.hpp @@ -38,2344 +38,2344 @@ namespace mkl { namespace lapack { namespace detail { -ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, - std::int64_t ldvt, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, +ONEMKL_EXPORT void heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, +ONEMKL_EXPORT void heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, +ONEMKL_EXPORT void hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, +ONEMKL_EXPORT void hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, +ONEMKL_EXPORT void ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, +ONEMKL_EXPORT void ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, +ONEMKL_EXPORT void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer> &a, +ONEMKL_EXPORT void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, +ONEMKL_EXPORT void orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +ONEMKL_EXPORT void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, +ONEMKL_EXPORT void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, - float *e, std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, - double *tauq, double *taup, double *scratchpad, +ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, - float *tauq, float *taup, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event heevd(oneapi::mkl::device libkey, sycl::queue &queue, + std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event heevd(oneapi::mkl::device libkey, sycl::queue &queue, + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, float *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, double *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue &queue, + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue &queue, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue &queue, + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue &queue, + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue &queue, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue &queue, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue &queue, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syevd(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *w, double *scratchpad, + double* a, std::int64_t lda, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syevd(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *w, float *scratchpad, + float* a, std::int64_t lda, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tau, double *scratchpad, + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tau, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue &queue, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue &queue, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex *a, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex *a, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event getrs_batch( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event getrs_batch( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, - std::int64_t **ipiv, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, - std::int64_t **ipiv, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event getrs_batch( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); ONEMKL_EXPORT sycl::event ungqr_batch( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, std::complex *tau, - std::int64_t stride_tau, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); template = nullptr> -std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t heevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t heevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hegvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t hegvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t hetrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t hetrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hetrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t hetrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t ungbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ungbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, +std::int64_t ungqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ungtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t unmrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t unmrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t unmqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t unmtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ungqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t ungqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t getrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t getrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t getri_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t getri_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t getrs_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t getrs_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t heevd_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t heevd_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t potrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t potrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t potrs_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t potrs_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t potri_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t potri_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::job jobz, + sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::job jobz, + sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t itype, + sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t itype, + sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, + sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, + sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes); + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes); } //namespace detail } //namespace lapack } //namespace mkl diff --git a/include/oneapi/mkl/lapack/detail/lapack_rt.hpp b/include/oneapi/mkl/lapack/detail/lapack_rt.hpp index a96efe8d1..5199a0ce5 100644 --- a/include/oneapi/mkl/lapack/detail/lapack_rt.hpp +++ b/include/oneapi/mkl/lapack/detail/lapack_rt.hpp @@ -38,2132 +38,2132 @@ namespace oneapi { namespace mkl { namespace lapack { -static inline void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, +static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -static inline void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, +static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -static inline void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -static inline void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, +static inline void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -static inline void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +static inline void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -static inline void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, +static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -static inline void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -static inline void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +static inline void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -static inline void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -static inline void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -static inline void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, +static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -static inline void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, +static inline void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -static inline void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, +static inline void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -static inline void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, +static inline void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -static inline void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -static inline void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +static inline void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -static inline void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -static inline void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -static inline void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +static inline void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +static inline void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +static inline void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +static inline void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +static inline void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +static inline void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +static inline void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +static inline void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +static inline void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +static inline void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +static inline void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -static inline void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +static inline void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +static inline void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -static inline void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +static inline void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -static inline void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, +static inline void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -static inline void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, +static inline void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -static inline void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -static inline void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -static inline void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +static inline void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +static inline void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -static inline void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +static inline void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -static inline void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +static inline void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -static inline void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +static inline void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +static inline void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +static inline void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +static inline void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +static inline void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +static inline void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -static inline void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +static inline void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +static inline void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +static inline void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +static inline void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +static inline void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +static inline void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +static inline void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +static inline void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +static inline void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -static inline void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +static inline void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +static inline void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -static inline void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +static inline void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +static inline void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -static inline sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gebrd(get_device_id(queue), queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +static inline sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, +static inline sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gerqf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, +static inline sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +static inline sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf(get_device_id(queue), queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, +static inline sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, +static inline sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf(get_device_id(queue), queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, +static inline sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri(get_device_id(queue), queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, - double *b, std::int64_t ldb, double *scratchpad, +static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, +static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, - double *vt, std::int64_t ldvt, double *scratchpad, + double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu, + double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt, + std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +static inline sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::gesvd(get_device_id(queue), queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, +static inline sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::heevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, +static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, +static inline sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::hegvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::hetrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::hetrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::orgbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *scratchpad, +static inline sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *scratchpad, +static inline sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, +static inline sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, +static inline sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +static inline sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +static inline sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, +static inline sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, +static inline sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, +static inline sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, +static inline sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ormqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, +static inline sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, +static inline sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrf(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, +static inline sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, +static inline sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potri(get_device_id(queue), queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::syevd(get_device_id(queue), queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *w, double *scratchpad, +static inline sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *w, float *scratchpad, +static inline sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::sygvd(get_device_id(queue), queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sytrd(get_device_id(queue), queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, +static inline sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, +static inline sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::sytrf(get_device_id(queue), queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +static inline sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::trtrs(get_device_id(queue), queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungbr(get_device_id(queue), queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungqr(get_device_id(queue), queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +static inline sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungtr(get_device_id(queue), queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, +static inline sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, +static inline sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::unmrq(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, +static inline sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, +static inline sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::unmqr(get_device_id(queue), queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +static inline sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +static inline sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::unmtr(get_device_id(queue), queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::geqrf_batch(get_device_id(queue), queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrf_batch(get_device_id(queue), queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, +static inline sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getri_batch(get_device_id(queue), queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, +static inline sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::getrs_batch(get_device_id(queue), queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, +static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, +static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::orgqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, +static inline sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrf_batch(get_device_id(queue), queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::potrs_batch(get_device_id(queue), queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +static inline sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +static inline sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -static inline sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return detail::ungqr_batch(get_device_id(queue), queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } template = nullptr> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return detail::gebrd_scratchpad_size(get_device_id(queue), queue, m, n, lda); } template = nullptr> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return detail::gerqf_scratchpad_size(get_device_id(queue), queue, m, n, lda); } template = nullptr> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return detail::geqrf_scratchpad_size(get_device_id(queue), queue, m, n, lda); } template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { return detail::gesvd_scratchpad_size(get_device_id(queue), queue, jobu, jobvt, m, n, lda, ldu, ldvt); } template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { return detail::gesvd_scratchpad_size(get_device_id(queue), queue, jobu, jobvt, m, n, lda, ldu, ldvt); } template = nullptr> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return detail::getrf_scratchpad_size(get_device_id(queue), queue, m, n, lda); } template = nullptr> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { return detail::getri_scratchpad_size(get_device_id(queue), queue, n, lda); } template = nullptr> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return detail::getrs_scratchpad_size(get_device_id(queue), queue, trans, n, nrhs, lda, ldb); } template = nullptr> -std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::heevd_scratchpad_size(get_device_id(queue), queue, jobz, uplo, n, lda); } template = nullptr> -std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { return detail::hegvd_scratchpad_size(get_device_id(queue), queue, itype, jobz, uplo, n, lda, ldb); } template = nullptr> -std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::hetrd_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::hetrf_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return detail::orgbr_scratchpad_size(get_device_id(queue), queue, vect, m, n, k, lda); } template = nullptr> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::orgtr_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return detail::orgqr_scratchpad_size(get_device_id(queue), queue, m, n, k, lda); } template = nullptr> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { return detail::ormrq_scratchpad_size(get_device_id(queue), queue, side, trans, m, n, k, lda, ldc); } template = nullptr> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { return detail::ormqr_scratchpad_size(get_device_id(queue), queue, side, trans, m, n, k, @@ -2171,7 +2171,7 @@ std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, } template = nullptr> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2179,45 +2179,45 @@ std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, n, lda, ldc); } template = nullptr> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::potrf_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return detail::potrs_scratchpad_size(get_device_id(queue), queue, uplo, n, nrhs, lda, ldb); } template = nullptr> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::potri_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::sytrf_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::syevd_scratchpad_size(get_device_id(queue), queue, jobz, uplo, n, lda); } template = nullptr> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { return detail::sygvd_scratchpad_size(get_device_id(queue), queue, itype, jobz, uplo, n, lda, ldb); } template = nullptr> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::sytrd_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2226,31 +2226,31 @@ std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, } template = nullptr> -std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, +std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return detail::ungbr_scratchpad_size(get_device_id(queue), queue, vect, m, n, k, lda); } template = nullptr> -std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return detail::ungqr_scratchpad_size(get_device_id(queue), queue, m, n, k, lda); } template = nullptr> -std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return detail::ungtr_scratchpad_size(get_device_id(queue), queue, uplo, n, lda); } template = nullptr> -std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { return detail::unmrq_scratchpad_size(get_device_id(queue), queue, side, trans, m, n, k, lda, ldc); } template = nullptr> -std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { return detail::unmqr_scratchpad_size(get_device_id(queue), queue, side, trans, m, n, k, @@ -2258,7 +2258,7 @@ std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, } template = nullptr> -std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2266,21 +2266,21 @@ std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, n, lda, ldc); } template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return detail::getrf_batch_scratchpad_size(get_device_id(queue), queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return detail::getri_batch_scratchpad_size(get_device_id(queue), queue, n, lda, stride_a, stride_ipiv, batch_size); } template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, @@ -2290,21 +2290,21 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transp batch_size); } template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return detail::geqrf_batch_scratchpad_size(get_device_id(queue), queue, m, n, lda, stride_a, stride_tau, batch_size); } template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { return detail::potrf_batch_scratchpad_size(get_device_id(queue), queue, uplo, n, lda, stride_a, batch_size); } template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { @@ -2313,7 +2313,7 @@ std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo u } template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return detail::orgqr_batch_scratchpad_size(get_device_id(queue), queue, m, n, k, lda, @@ -2321,68 +2321,68 @@ std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std } template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return detail::ungqr_batch_scratchpad_size(get_device_id(queue), queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return detail::getrf_batch_scratchpad_size(get_device_id(queue), queue, m, n, lda, group_count, group_sizes); } template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return detail::getri_batch_scratchpad_size(get_device_id(queue), queue, n, lda, group_count, group_sizes); } template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return detail::getrs_batch_scratchpad_size(get_device_id(queue), queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return detail::geqrf_batch_scratchpad_size(get_device_id(queue), queue, m, n, lda, group_count, group_sizes); } template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return detail::orgqr_batch_scratchpad_size(get_device_id(queue), queue, m, n, k, lda, group_count, group_sizes); } template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return detail::potrf_batch_scratchpad_size(get_device_id(queue), queue, uplo, n, lda, group_count, group_sizes); } template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return detail::potrs_batch_scratchpad_size(get_device_id(queue), queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return detail::ungqr_batch_scratchpad_size(get_device_id(queue), queue, m, n, k, lda, group_count, group_sizes); } diff --git a/include/oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx b/include/oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx index 1ebe97527..f952eb3fd 100644 --- a/include/oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx +++ b/include/oneapi/mkl/lapack/detail/mkl_common/lapack_ct.hxx @@ -18,765 +18,765 @@ *******************************************************************************/ static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, - std::int64_t ldvt, sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, @@ -784,10 +784,10 @@ static inline void getri_batch(backend_selector selecto } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, @@ -795,10 +795,10 @@ static inline void getrs_batch(backend_selector selecto } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, @@ -806,11 +806,11 @@ static inline void getrs_batch(backend_selector selecto } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, @@ -818,111 +818,111 @@ static inline void getrs_batch(backend_selector selecto } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch( selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch( selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch( selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch( selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, @@ -930,9 +930,9 @@ static inline void potrs_batch(backend_selector selecto } static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, @@ -940,10 +940,10 @@ static inline void potrs_batch(backend_selector selecto } static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, @@ -951,21 +951,21 @@ static inline void potrs_batch(backend_selector selecto } static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, @@ -973,370 +973,370 @@ static inline void ungqr_batch(backend_selector selecto } static inline void ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, - float *e, std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, - double *tauq, double *taup, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, - float *tauq, float *taup, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } static inline sycl::event heevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event heevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgbr( selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgbr( selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1344,243 +1344,243 @@ static inline sycl::event ormtr(backend_selector select static inline sycl::event ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event syevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *w, double *scratchpad, + double* a, std::int64_t lda, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event syevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *w, float *scratchpad, + float* a, std::int64_t lda, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *w, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *w, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tau, double *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tau, float *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); @@ -1588,19 +1588,19 @@ static inline sycl::event trtrs(backend_selector select static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); @@ -1608,71 +1608,71 @@ static inline sycl::event trtrs(backend_selector select static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungbr( selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungbr( selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1680,11 +1680,11 @@ static inline sycl::event unmrq(backend_selector select static inline sycl::event unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1692,10 +1692,10 @@ static inline sycl::event unmrq(backend_selector select static inline sycl::event unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1703,11 +1703,11 @@ static inline sycl::event unmqr(backend_selector select static inline sycl::event unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1715,10 +1715,10 @@ static inline sycl::event unmqr(backend_selector select static inline sycl::event unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); @@ -1726,607 +1726,607 @@ static inline sycl::event unmtr(backend_selector select static inline sycl::event unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch( selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch( selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::complex *a, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch( selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::complex *a, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch( selector.get_queue(), m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch( selector.get_queue(), m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch( selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch( selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch( selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch( selector.get_queue(), n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch(selector.get_queue(), n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, - std::int64_t **ipiv, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, - std::int64_t **ipiv, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( - backend_selector selector, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + backend_selector selector, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch( selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch( selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch( selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch( selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch(selector.get_queue(), uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch( selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch( selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch( selector.get_queue(), m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch( selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch( selector.get_queue(), m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); @@ -2631,64 +2631,64 @@ std::int64_t ungqr_batch_scratchpad_size(backend_selector = nullptr> std::int64_t getrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template = nullptr> std::int64_t getri_batch_scratchpad_size(backend_selector selector, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::LAPACK_BACKEND::getri_batch_scratchpad_size( selector.get_queue(), n, lda, group_count, group_sizes); } template = nullptr> std::int64_t getrs_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::LAPACK_BACKEND::getrs_batch_scratchpad_size( selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes); } template = nullptr> std::int64_t geqrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::LAPACK_BACKEND::geqrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template = nullptr> std::int64_t orgqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::LAPACK_BACKEND::orgqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } template = nullptr> std::int64_t potrf_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrf_batch_scratchpad_size( selector.get_queue(), uplo, n, lda, group_count, group_sizes); } template = nullptr> std::int64_t potrs_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::LAPACK_BACKEND::potrs_batch_scratchpad_size( selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template = nullptr> std::int64_t ungqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::LAPACK_BACKEND::ungqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } diff --git a/include/oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx b/include/oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx index 372e2646b..bbc6079b7 100644 --- a/include/oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx +++ b/include/oneapi/mkl/lapack/detail/mkl_common/onemkl_lapack_backends.hxx @@ -17,2123 +17,2123 @@ * SPDX-License-Identifier: Apache-2.0 *******************************************************************************/ -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, - double *b, std::int64_t ldb, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, - double *vt, std::int64_t ldvt, double *scratchpad, + double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu, + double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt, + std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *w, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *w, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); template = nullptr> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda); +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, +std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> -ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template <> -ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t getrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t getrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t getrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t getrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t getri_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t getri_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> -ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t getrs_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t getrs_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> -ONEMKL_EXPORT std::int64_t heevd_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t heevd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t heevd_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t heevd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size>( - sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size>( - sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> -ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> -ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> -ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> -ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> -ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> -ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> -ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t potrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t potrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t potrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t potrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> -ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t potrs_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t potrs_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> -ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t potri_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t potri_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t potri_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t potri_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> -ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template <> -ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> -ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template <> ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> -ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size>(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template <> ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template <> -ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> -ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> -ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> -ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> -ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> -ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> -ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> -ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, +ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template <> -ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* n, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *n, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* n, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, - oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, + oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> -ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, - oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, + oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template <> ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); diff --git a/include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hxx b/include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hxx index 774441409..1ba7533c1 100644 --- a/include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hxx +++ b/include/oneapi/mkl/lapack/detail/rocsolver/lapack_ct.hxx @@ -22,2293 +22,2315 @@ // Buffer APIs static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getrf(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void getri(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); -} -static inline void getrs(backend_selector selector, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, - scratchpad, scratchpad_size); + scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, - std::int64_t ldvt, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, - std::int64_t ldu, sycl::buffer> &vt, - std::int64_t ldvt, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, - vt, ldvt, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void heevd(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void hetrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void hetrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgbr(backend_selector selector, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void orgtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potri(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void potrs(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer &scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, - scratchpad_size); + oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, - w, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void sytrd(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void sytrf(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, - b, ldb, scratchpad, scratchpad_size); + b, ldb, scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size); + scratchpad, scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void ungtr(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, - scratchpad_size); + scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, - ldc, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, - c, ldc, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getri_batch(backend_selector selector, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - stride_a, ipiv, stride_ipiv, b, ldb, stride_b, - batch_size, scratchpad, scratchpad_size); + stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, sycl::buffer> &a, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, + sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, - stride_ipiv, batch_size, scratchpad, - scratchpad_size); + stride_ipiv, batch_size, scratchpad, + scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf_batch(backend_selector selector, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrf_batch(backend_selector selector, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, +static inline void potrf_batch(backend_selector selector, + oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, +static inline void potrf_batch(backend_selector selector, + oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, - batch_size, scratchpad, scratchpad_size); + batch_size, scratchpad, scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +static inline void potrs_batch(backend_selector selector, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); -} -static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); +} +static inline void potrs_batch(backend_selector selector, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, +static inline void potrs_batch(backend_selector selector, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } -static inline void potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &b, +static inline void potrs_batch(backend_selector selector, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, scratchpad, - scratchpad_size); + stride_a, b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, + sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } static inline void ungqr_batch(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { - oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, - stride_tau, batch_size, scratchpad, scratchpad_size); + oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size); } // USM APIs static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, - float *e, std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, - double *tauq, double *taup, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, - float *tauq, float *taup, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gebrd(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, - taup, scratchpad, scratchpad_size, dependencies); + taup, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event gerqf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::gerqf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event geqrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, - scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::geqrf(selector.get_queue(), m, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrf(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf(selector.get_queue(), m, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getri(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, - b, ldb, scratchpad, scratchpad_size, dependencies); + b, ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::int64_t n, std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event gesvd(backend_selector selector, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, - u, ldu, vt, ldvt, scratchpad, scratchpad_size, - dependencies); + u, ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); } -static inline sycl::event heevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event heevd(backend_selector selector, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event heevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event heevd(backend_selector selector, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, float *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, double *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event hetrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ormqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potri(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, - scratchpad_size, dependencies); + scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, - scratchpad, scratchpad_size, dependencies); -} -static inline sycl::event syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *w, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event syevd(backend_selector selector, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event syevd(backend_selector selector, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *w, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { +static inline sycl::event syevd(backend_selector selector, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, - b, ldb, w, scratchpad, scratchpad_size, - dependencies); + b, ldb, w, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tau, double *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrd(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tau, float *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event sytrf(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event trtrs(backend_selector selector, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, - lda, b, ldb, scratchpad, scratchpad_size, - dependencies); + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, + a, lda, b, ldb, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungbr(backend_selector selector, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungqr(selector.get_queue(), m, n, k, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungtr(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungtr(selector.get_queue(), uplo, n, a, lda, tau, - scratchpad, scratchpad_size, dependencies); + scratchpad, scratchpad_size, dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmrq(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmqr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, - tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event unmtr(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, - lda, tau, c, ldc, scratchpad, scratchpad_size, - dependencies); + lda, tau, c, ldc, scratchpad, scratchpad_size, + dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - tau, stride_tau, batch_size, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { - return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event geqrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::mkl::lapack::rocsolver::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getrf_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, stride_a, - ipiv, stride_ipiv, batch_size, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getri_batch(backend_selector selector, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event getri_batch(backend_selector selector, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +static inline sycl::event getri_batch(backend_selector selector, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getri_batch(selector.get_queue(), n, a, lda, ipiv, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs_batch( selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, - std::int64_t **ipiv, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, - std::int64_t **ipiv, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event getrs_batch( - backend_selector selector, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + backend_selector selector, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, - ipiv, b, ldb, group_count, group_sizes, - scratchpad, scratchpad_size, dependencies); + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +static inline sycl::event orgqr_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } -static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +static inline sycl::event orgqr_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - stride_a, batch_size, scratchpad, - scratchpad_size, dependencies); + stride_a, batch_size, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrf_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrf_batch(selector.get_queue(), uplo, n, a, lda, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - stride_a, b, ldb, stride_b, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event potrs_batch(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, - b, ldb, group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + b, ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); } static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, - stride_a, tau, stride_tau, batch_size, - scratchpad, scratchpad_size, dependencies); -} -static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + stride_a, tau, stride_tau, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); -} -static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return oneapi::mkl::lapack::rocsolver::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, - group_count, group_sizes, scratchpad, - scratchpad_size, dependencies); + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); } // SCRATCHPAD APIs template std::int64_t gebrd_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::gebrd_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::mkl::lapack::rocsolver::gebrd_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t gerqf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::gerqf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::mkl::lapack::rocsolver::gerqf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t geqrf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::geqrf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::mkl::lapack::rocsolver::geqrf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t gesvd_scratchpad_size(backend_selector selector, @@ -2321,82 +2343,83 @@ std::int64_t gesvd_scratchpad_size(backend_selector selector template std::int64_t getrf_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::getrf_scratchpad_size(selector.get_queue(), m, n, - lda); + return oneapi::mkl::lapack::rocsolver::getrf_scratchpad_size(selector.get_queue(), m, + n, lda); } template std::int64_t getri_scratchpad_size(backend_selector selector, std::int64_t n, std::int64_t lda) { return oneapi::mkl::lapack::rocsolver::getri_scratchpad_size(selector.get_queue(), n, - lda); + lda); } template std::int64_t getrs_scratchpad_size(backend_selector selector, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return oneapi::mkl::lapack::rocsolver::getrs_scratchpad_size(selector.get_queue(), - trans, n, nrhs, lda, ldb); + trans, n, nrhs, lda, ldb); } template std::int64_t heevd_scratchpad_size(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::heevd_scratchpad_size(selector.get_queue(), jobz, - uplo, n, lda); + return oneapi::mkl::lapack::rocsolver::heevd_scratchpad_size(selector.get_queue(), + jobz, uplo, n, lda); } template -std::int64_t hegvd_scratchpad_size(backend_selector selector, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t ldb) { +std::int64_t hegvd_scratchpad_size(backend_selector selector, + std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t ldb) { return oneapi::mkl::lapack::rocsolver::hegvd_scratchpad_size( selector.get_queue(), itype, jobz, uplo, n, lda, ldb); } template std::int64_t hetrd_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::hetrd_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::mkl::lapack::rocsolver::hetrd_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t hetrf_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::hetrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::mkl::lapack::rocsolver::hetrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t orgbr_scratchpad_size(backend_selector selector, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::orgbr_scratchpad_size(selector.get_queue(), vect, - m, n, k, lda); + return oneapi::mkl::lapack::rocsolver::orgbr_scratchpad_size(selector.get_queue(), + vect, m, n, k, lda); } template std::int64_t orgtr_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::orgtr_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::mkl::lapack::rocsolver::orgtr_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t orgqr_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::orgqr_scratchpad_size(selector.get_queue(), m, n, - k, lda); + return oneapi::mkl::lapack::rocsolver::orgqr_scratchpad_size(selector.get_queue(), m, + n, k, lda); } template std::int64_t ormrq_scratchpad_size(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::mkl::lapack::rocsolver::ormrq_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::mkl::lapack::rocsolver::ormrq_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t ormqr_scratchpad_size(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::mkl::lapack::rocsolver::ormqr_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::mkl::lapack::rocsolver::ormqr_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t ormtr_scratchpad_size(backend_selector selector, @@ -2409,47 +2432,48 @@ std::int64_t ormtr_scratchpad_size(backend_selector selector template std::int64_t potrf_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::potrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::mkl::lapack::rocsolver::potrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t potrs_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { - return oneapi::mkl::lapack::rocsolver::potrs_scratchpad_size(selector.get_queue(), uplo, - n, nrhs, lda, ldb); + return oneapi::mkl::lapack::rocsolver::potrs_scratchpad_size(selector.get_queue(), + uplo, n, nrhs, lda, ldb); } template std::int64_t potri_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::potri_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::mkl::lapack::rocsolver::potri_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t sytrf_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::sytrf_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::mkl::lapack::rocsolver::sytrf_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t syevd_scratchpad_size(backend_selector selector, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::syevd_scratchpad_size(selector.get_queue(), jobz, - uplo, n, lda); + return oneapi::mkl::lapack::rocsolver::syevd_scratchpad_size(selector.get_queue(), + jobz, uplo, n, lda); } template -std::int64_t sygvd_scratchpad_size(backend_selector selector, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t lda, std::int64_t ldb) { +std::int64_t sygvd_scratchpad_size(backend_selector selector, + std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t ldb) { return oneapi::mkl::lapack::rocsolver::sygvd_scratchpad_size( selector.get_queue(), itype, jobz, uplo, n, lda, ldb); } template std::int64_t sytrd_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::sytrd_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::mkl::lapack::rocsolver::sytrd_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t trtrs_scratchpad_size(backend_selector selector, @@ -2463,36 +2487,36 @@ template std::int64_t ungbr_scratchpad_size(backend_selector selector, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::ungbr_scratchpad_size(selector.get_queue(), vect, - m, n, k, lda); + return oneapi::mkl::lapack::rocsolver::ungbr_scratchpad_size(selector.get_queue(), + vect, m, n, k, lda); } template std::int64_t ungqr_scratchpad_size(backend_selector selector, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::ungqr_scratchpad_size(selector.get_queue(), m, n, - k, lda); + return oneapi::mkl::lapack::rocsolver::ungqr_scratchpad_size(selector.get_queue(), m, + n, k, lda); } template std::int64_t ungtr_scratchpad_size(backend_selector selector, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { - return oneapi::mkl::lapack::rocsolver::ungtr_scratchpad_size(selector.get_queue(), uplo, - n, lda); + return oneapi::mkl::lapack::rocsolver::ungtr_scratchpad_size(selector.get_queue(), + uplo, n, lda); } template std::int64_t unmrq_scratchpad_size(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::mkl::lapack::rocsolver::unmrq_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::mkl::lapack::rocsolver::unmrq_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t unmqr_scratchpad_size(backend_selector selector, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { - return oneapi::mkl::lapack::rocsolver::unmqr_scratchpad_size(selector.get_queue(), side, - trans, m, n, k, lda, ldc); + return oneapi::mkl::lapack::rocsolver::unmqr_scratchpad_size( + selector.get_queue(), side, trans, m, n, k, lda, ldc); } template std::int64_t unmtr_scratchpad_size(backend_selector selector, @@ -2568,62 +2592,62 @@ std::int64_t ungqr_batch_scratchpad_size(backend_selector se } template std::int64_t getrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::rocsolver::getrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template std::int64_t getri_batch_scratchpad_size(backend_selector selector, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::rocsolver::getri_batch_scratchpad_size( selector.get_queue(), n, lda, group_count, group_sizes); } template std::int64_t getrs_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::rocsolver::getrs_batch_scratchpad_size( selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes); } template std::int64_t geqrf_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::rocsolver::geqrf_batch_scratchpad_size( selector.get_queue(), m, n, lda, group_count, group_sizes); } template std::int64_t orgqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::rocsolver::orgqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } template std::int64_t potrf_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::rocsolver::potrf_batch_scratchpad_size( selector.get_queue(), uplo, n, lda, group_count, group_sizes); } template std::int64_t potrs_batch_scratchpad_size(backend_selector selector, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { return oneapi::mkl::lapack::rocsolver::potrs_batch_scratchpad_size( selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template std::int64_t ungqr_batch_scratchpad_size(backend_selector selector, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return oneapi::mkl::lapack::rocsolver::ungqr_batch_scratchpad_size( selector.get_queue(), m, n, k, lda, group_count, group_sizes); } diff --git a/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hxx b/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hxx index c68009e54..3b205f606 100644 --- a/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hxx +++ b/include/oneapi/mkl/lapack/detail/rocsolver/onemkl_lapack_rocsolver.hxx @@ -21,1815 +21,1812 @@ // Buffer APIs -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, - sycl::buffer &taup, sycl::buffer &scratchpad, +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +ONEMKL_EXPORT void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, +ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, +ONEMKL_EXPORT void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +ONEMKL_EXPORT void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +ONEMKL_EXPORT void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, +ONEMKL_EXPORT void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +ONEMKL_EXPORT void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, +ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, +ONEMKL_EXPORT void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, +ONEMKL_EXPORT void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, +ONEMKL_EXPORT void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getri_batch(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getri_batch(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, +ONEMKL_EXPORT void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, +ONEMKL_EXPORT void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); + sycl::buffer& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, +ONEMKL_EXPORT void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -ONEMKL_EXPORT void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +ONEMKL_EXPORT void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); // USM APIs -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, - double *b, std::int64_t ldb, double *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, std::int64_t ldu, - double *vt, std::int64_t ldvt, double *scratchpad, + double* a, std::int64_t lda, double* s, double* u, std::int64_t ldu, + double* vt, std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt, + std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, +ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, +ONEMKL_EXPORT sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *w, double *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *w, float *scratchpad, +ONEMKL_EXPORT sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); - -ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +ONEMKL_EXPORT sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +ONEMKL_EXPORT sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); // SCRATCHPAD APIs template -ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +ONEMKL_EXPORT std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template -ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMKL_EXPORT std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +ONEMKL_EXPORT std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +ONEMKL_EXPORT std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +ONEMKL_EXPORT std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, +ONEMKL_EXPORT std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +ONEMKL_EXPORT std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +ONEMKL_EXPORT std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template -ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, +ONEMKL_EXPORT std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +ONEMKL_EXPORT std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template -ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +ONEMKL_EXPORT std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template -ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, - oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, - std::int64_t lda, std::int64_t stride_a, - std::int64_t stride_ipiv, std::int64_t ldb, - std::int64_t stride_b, - std::int64_t batch_size); +ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, +ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template -ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMKL_EXPORT std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template ONEMKL_EXPORT std::int64_t getrs_batch_scratchpad_size( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMKL_EXPORT std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +ONEMKL_EXPORT std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +ONEMKL_EXPORT std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); template -ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +ONEMKL_EXPORT std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); diff --git a/include/oneapi/mkl/lapack/exceptions.hpp b/include/oneapi/mkl/lapack/exceptions.hpp index da205cc1a..59de3b4de 100644 --- a/include/oneapi/mkl/lapack/exceptions.hpp +++ b/include/oneapi/mkl/lapack/exceptions.hpp @@ -25,7 +25,7 @@ namespace lapack { class exception { public: - exception(oneapi::mkl::exception *_ex, std::int64_t info, std::int64_t detail = 0) + exception(oneapi::mkl::exception* _ex, std::int64_t info, std::int64_t detail = 0) : _info(info), _detail(detail), _ex(_ex) {} @@ -35,20 +35,20 @@ class exception { std::int64_t detail() const { return _detail; } - const char *what() const { + const char* what() const { return _ex->what(); } private: std::int64_t _info; std::int64_t _detail; - mkl::exception *_ex; + mkl::exception* _ex; }; class computation_error : public oneapi::mkl::computation_error, public oneapi::mkl::lapack::exception { public: - computation_error(const std::string &function, const std::string &info, std::int64_t code) + computation_error(const std::string& function, const std::string& info, std::int64_t code) : oneapi::mkl::computation_error("LAPACK", function, info), oneapi::mkl::lapack::exception(this, code) {} using oneapi::mkl::computation_error::what; @@ -56,17 +56,17 @@ class computation_error : public oneapi::mkl::computation_error, class batch_error : public oneapi::mkl::batch_error, public oneapi::mkl::lapack::exception { public: - batch_error(const std::string &function, const std::string &info, std::int64_t num_errors, + batch_error(const std::string& function, const std::string& info, std::int64_t num_errors, std::vector ids = {}, std::vector exceptions = {}) : oneapi::mkl::batch_error("LAPACK", function, info), oneapi::mkl::lapack::exception(this, num_errors), _ids(ids), _exceptions(exceptions) {} using oneapi::mkl::batch_error::what; - const std::vector &ids() const { + const std::vector& ids() const { return _ids; } - const std::vector &exceptions() const { + const std::vector& exceptions() const { return _exceptions; } @@ -78,7 +78,7 @@ class batch_error : public oneapi::mkl::batch_error, public oneapi::mkl::lapack: class invalid_argument : public oneapi::mkl::invalid_argument, public oneapi::mkl::lapack::exception { public: - invalid_argument(const std::string &function, const std::string &info, + invalid_argument(const std::string& function, const std::string& info, std::int64_t arg_position = 0, std::int64_t detail = 0) : oneapi::mkl::invalid_argument("LAPACK", function, info), oneapi::mkl::lapack::exception(this, arg_position, detail) {} diff --git a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp index ace216f00..75ee22211 100644 --- a/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/helper_types.hpp @@ -40,7 +40,7 @@ inline constexpr bool is_int_supported_v = template inline constexpr bool are_fp_int_supported_v = - is_fp_supported_v&& is_int_supported_v; + is_fp_supported_v && is_int_supported_v; } // namespace detail } // namespace sparse diff --git a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx index 4b701eb6f..e25fff46e 100644 --- a/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx +++ b/include/oneapi/mkl/sparse_blas/detail/onemkl_sparse_blas_backends.hxx @@ -22,196 +22,196 @@ // Dense vector template -ONEMKL_EXPORT void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, +ONEMKL_EXPORT void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle, std::int64_t size, sycl::buffer val); template -ONEMKL_EXPORT void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, - std::int64_t size, dataType *val); +ONEMKL_EXPORT void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle, + std::int64_t size, dataType* val); template -ONEMKL_EXPORT void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, +ONEMKL_EXPORT void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle, std::int64_t size, sycl::buffer val); template -ONEMKL_EXPORT void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, - std::int64_t size, dataType *val); +ONEMKL_EXPORT void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle, + std::int64_t size, dataType* val); -ONEMKL_EXPORT sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhandle, + const std::vector& dependencies = {}); // Dense matrix template -ONEMKL_EXPORT void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, +ONEMKL_EXPORT void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); template -ONEMKL_EXPORT void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, +ONEMKL_EXPORT void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, - layout dense_layout, dataType *val); + layout dense_layout, dataType* val); template -ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, +ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); template -ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, +ONEMKL_EXPORT void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t ld, layout dense_layout, dataType *val); + std::int64_t ld, layout dense_layout, dataType* val); -ONEMKL_EXPORT sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhandle, + const std::vector& dependencies = {}); // COO matrix template -ONEMKL_EXPORT void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, +ONEMKL_EXPORT void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val); template -ONEMKL_EXPORT void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, +ONEMKL_EXPORT void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - index_base index, indexType *row_ind, indexType *col_ind, - dataType *val); + index_base index, indexType* row_ind, indexType* col_ind, + dataType* val); template -ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, +ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val); template -ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, +ONEMKL_EXPORT void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ind, - indexType *col_ind, dataType *val); + std::int64_t nnz, index_base index, indexType* row_ind, + indexType* col_ind, dataType* val); // CSR matrix template -ONEMKL_EXPORT void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, +ONEMKL_EXPORT void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val); template -ONEMKL_EXPORT void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, +ONEMKL_EXPORT void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - index_base index, indexType *row_ptr, indexType *col_ind, - dataType *val); + index_base index, indexType* row_ptr, indexType* col_ind, + dataType* val); template -ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, +ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val); template -ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, +ONEMKL_EXPORT void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ptr, - indexType *col_ind, dataType *val); + std::int64_t nnz, index_base index, indexType* row_ptr, + indexType* col_ind, dataType* val); // Common sparse matrix functions -ONEMKL_EXPORT sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event release_sparse_matrix(sycl::queue& queue, matrix_handle_t smhandle, + const std::vector& dependencies = {}); -bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property); +bool set_matrix_property(sycl::queue& queue, matrix_handle_t smhandle, matrix_property property); // SPMM -ONEMKL_EXPORT void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr); +ONEMKL_EXPORT void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr); -ONEMKL_EXPORT sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr, + const std::vector& dependencies = {}); -ONEMKL_EXPORT void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, +ONEMKL_EXPORT void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, std::size_t &temp_buffer_size); + spmm_descr_t spmm_descr, std::size_t& temp_buffer_size); -ONEMKL_EXPORT void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, +ONEMKL_EXPORT void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace); -ONEMKL_EXPORT sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, +ONEMKL_EXPORT sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies = {}); + spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, +ONEMKL_EXPORT sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); // SPMV -ONEMKL_EXPORT void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr); +ONEMKL_EXPORT void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr); -ONEMKL_EXPORT sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr, + const std::vector& dependencies = {}); -ONEMKL_EXPORT void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, +ONEMKL_EXPORT void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, std::size_t &temp_buffer_size); + spmv_descr_t spmv_descr, std::size_t& temp_buffer_size); -ONEMKL_EXPORT void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +ONEMKL_EXPORT void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, sycl::buffer workspace); -ONEMKL_EXPORT sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, - const void *alpha, matrix_view A_view, +ONEMKL_EXPORT sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, - spmv_alg alg, spmv_descr_t spmv_descr, void *workspace, - const std::vector &dependencies = {}); + const void* beta, dense_vector_handle_t y_handle, + spmv_alg alg, spmv_descr_t spmv_descr, void* workspace, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +ONEMKL_EXPORT sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); // SPSV -ONEMKL_EXPORT void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr); +ONEMKL_EXPORT void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr); -ONEMKL_EXPORT sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}); +ONEMKL_EXPORT sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr, + const std::vector& dependencies = {}); -ONEMKL_EXPORT void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, +ONEMKL_EXPORT void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); -ONEMKL_EXPORT void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +ONEMKL_EXPORT void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, sycl::buffer workspace); -ONEMKL_EXPORT sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, - const void *alpha, matrix_view A_view, +ONEMKL_EXPORT sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, - spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies = {}); + spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies = {}); -ONEMKL_EXPORT sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +ONEMKL_EXPORT sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx index ca09d09d4..aacc32ce3 100644 --- a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx +++ b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_ct.hxx @@ -27,14 +27,14 @@ // Dense vector template std::enable_if_t> init_dense_vector( - backend_selector selector, dense_vector_handle_t *p_dvhandle, + backend_selector selector, dense_vector_handle_t* p_dvhandle, std::int64_t size, sycl::buffer val) { BACKEND::init_dense_vector(selector.get_queue(), p_dvhandle, size, val); } template std::enable_if_t> init_dense_vector( - backend_selector selector, dense_vector_handle_t *p_dvhandle, - std::int64_t size, dataType *val) { + backend_selector selector, dense_vector_handle_t* p_dvhandle, + std::int64_t size, dataType* val) { BACKEND::init_dense_vector(selector.get_queue(), p_dvhandle, size, val); } @@ -47,20 +47,20 @@ std::enable_if_t> set_dense_vector_data( template std::enable_if_t> set_dense_vector_data( backend_selector selector, dense_vector_handle_t dvhandle, std::int64_t size, - dataType *val) { + dataType* val) { BACKEND::set_dense_vector_data(selector.get_queue(), dvhandle, size, val); } inline sycl::event release_dense_vector(backend_selector selector, dense_vector_handle_t dvhandle, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_dense_vector(selector.get_queue(), dvhandle, dependencies); } // Dense matrix template std::enable_if_t> init_dense_matrix( - backend_selector selector, dense_matrix_handle_t *p_dmhandle, + backend_selector selector, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val) { BACKEND::init_dense_matrix(selector.get_queue(), p_dmhandle, num_rows, num_cols, ld, @@ -68,9 +68,9 @@ std::enable_if_t> init_dense_matrix( } template std::enable_if_t> init_dense_matrix( - backend_selector selector, dense_matrix_handle_t *p_dmhandle, + backend_selector selector, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, - dataType *val) { + dataType* val) { BACKEND::init_dense_matrix(selector.get_queue(), p_dmhandle, num_rows, num_cols, ld, dense_layout, val); } @@ -87,21 +87,21 @@ template std::enable_if_t> set_dense_matrix_data( backend_selector selector, dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, - dataType *val) { + dataType* val) { BACKEND::set_dense_matrix_data(selector.get_queue(), dmhandle, num_rows, num_cols, ld, dense_layout, val); } inline sycl::event release_dense_matrix(backend_selector selector, dense_matrix_handle_t dmhandle, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_dense_matrix(selector.get_queue(), dmhandle, dependencies); } // COO matrix template std::enable_if_t> init_coo_matrix( - backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + backend_selector selector, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { BACKEND::init_coo_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, @@ -109,9 +109,9 @@ std::enable_if_t> init_coo_m } template std::enable_if_t> init_coo_matrix( - backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ind, - indexType *col_ind, dataType *val) { + backend_selector selector, matrix_handle_t* p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType* row_ind, + indexType* col_ind, dataType* val) { BACKEND::init_coo_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); } @@ -127,8 +127,8 @@ std::enable_if_t> set_coo_ma template std::enable_if_t> set_coo_matrix_data( backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ind, - indexType *col_ind, dataType *val) { + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType* row_ind, + indexType* col_ind, dataType* val) { BACKEND::set_coo_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); } @@ -136,7 +136,7 @@ std::enable_if_t> set_coo_ma // CSR matrix template std::enable_if_t> init_csr_matrix( - backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, + backend_selector selector, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { BACKEND::init_csr_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, @@ -144,9 +144,9 @@ std::enable_if_t> init_csr_m } template std::enable_if_t> init_csr_matrix( - backend_selector selector, matrix_handle_t *p_smhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ptr, - indexType *col_ind, dataType *val) { + backend_selector selector, matrix_handle_t* p_smhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType* row_ptr, + indexType* col_ind, dataType* val) { BACKEND::init_csr_matrix(selector.get_queue(), p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); } @@ -162,8 +162,8 @@ std::enable_if_t> set_csr_ma template std::enable_if_t> set_csr_matrix_data( backend_selector selector, matrix_handle_t smhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t nnz, index_base index, indexType *row_ptr, - indexType *col_ind, dataType *val) { + std::int64_t num_cols, std::int64_t nnz, index_base index, indexType* row_ptr, + indexType* col_ind, dataType* val) { BACKEND::set_csr_matrix_data(selector.get_queue(), smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); } @@ -171,7 +171,7 @@ std::enable_if_t> set_csr_ma // Common sparse matrix functions inline sycl::event release_sparse_matrix(backend_selector selector, matrix_handle_t smhandle, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_sparse_matrix(selector.get_queue(), smhandle, dependencies); } @@ -182,30 +182,30 @@ inline bool set_matrix_property(backend_selector selector, // SPMM inline void init_spmm_descr(backend_selector selector, - spmm_descr_t *p_spmm_descr) { + spmm_descr_t* p_spmm_descr) { BACKEND::init_spmm_descr(selector.get_queue(), p_spmm_descr); } inline sycl::event release_spmm_descr(backend_selector selector, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_spmm_descr(selector.get_queue(), spmm_descr, dependencies); } inline void spmm_buffer_size(backend_selector selector, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - std::size_t &temp_buffer_size) { + std::size_t& temp_buffer_size) { BACKEND::spmm_buffer_size(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, temp_buffer_size); } inline void spmm_optimize(backend_selector selector, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, + oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace) { BACKEND::spmm_optimize(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, workspace); @@ -213,48 +213,48 @@ inline void spmm_optimize(backend_selector selector, oneapi::m inline sycl::event spmm_optimize(backend_selector selector, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies = {}) { + spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies = {}) { return BACKEND::spmm_optimize(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, workspace, dependencies); } inline sycl::event spmm(backend_selector selector, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, - matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view, + matrix_handle_t A_handle, dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::spmm(selector.get_queue(), opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, dependencies); } // SPMV inline void init_spmv_descr(backend_selector selector, - spmv_descr_t *p_spmv_descr) { + spmv_descr_t* p_spmv_descr) { BACKEND::init_spmv_descr(selector.get_queue(), p_spmv_descr); } inline sycl::event release_spmv_descr(backend_selector selector, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_spmv_descr(selector.get_queue(), spmv_descr, dependencies); } inline void spmv_buffer_size(backend_selector selector, - oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) { BACKEND::spmv_buffer_size(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, temp_buffer_size); } inline void spmv_optimize(backend_selector selector, oneapi::mkl::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, sycl::buffer workspace) { BACKEND::spmv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, @@ -262,47 +262,47 @@ inline void spmv_optimize(backend_selector selector, oneapi::m } inline sycl::event spmv_optimize(backend_selector selector, - oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, void *workspace, - const std::vector &dependencies = {}) { + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, void* workspace, + const std::vector& dependencies = {}) { return BACKEND::spmv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, workspace, dependencies); } inline sycl::event spmv(backend_selector selector, oneapi::mkl::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::spmv(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, dependencies); } // SPSV inline void init_spsv_descr(backend_selector selector, - spsv_descr_t *p_spsv_descr) { + spsv_descr_t* p_spsv_descr) { BACKEND::init_spsv_descr(selector.get_queue(), p_spsv_descr); } inline sycl::event release_spsv_descr(backend_selector selector, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::release_spsv_descr(selector.get_queue(), spsv_descr, dependencies); } inline void spsv_buffer_size(backend_selector selector, - oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size) { + std::size_t& temp_buffer_size) { BACKEND::spsv_buffer_size(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, temp_buffer_size); } inline void spsv_optimize(backend_selector selector, oneapi::mkl::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, sycl::buffer workspace) { @@ -311,20 +311,20 @@ inline void spsv_optimize(backend_selector selector, oneapi::m } inline sycl::event spsv_optimize(backend_selector selector, - oneapi::mkl::transpose opA, const void *alpha, matrix_view A_view, + oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, - spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies = {}) { + spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies = {}) { return BACKEND::spsv_optimize(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, workspace, dependencies); } inline sycl::event spsv(backend_selector selector, oneapi::mkl::transpose opA, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}) { + const std::vector& dependencies = {}) { return BACKEND::spsv(selector.get_queue(), opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, dependencies); } diff --git a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp index 86a00f507..e99613ba3 100644 --- a/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp +++ b/include/oneapi/mkl/sparse_blas/detail/sparse_blas_rt.hpp @@ -30,176 +30,176 @@ namespace sparse { // Dense vector template std::enable_if_t> init_dense_vector( - sycl::queue &queue, dense_vector_handle_t *p_dvhandle, std::int64_t size, + sycl::queue& queue, dense_vector_handle_t* p_dvhandle, std::int64_t size, sycl::buffer val); template std::enable_if_t> init_dense_vector( - sycl::queue &queue, dense_vector_handle_t *p_dvhandle, std::int64_t size, dataType *val); + sycl::queue& queue, dense_vector_handle_t* p_dvhandle, std::int64_t size, dataType* val); template std::enable_if_t> set_dense_vector_data( - sycl::queue &queue, dense_vector_handle_t dvhandle, std::int64_t size, + sycl::queue& queue, dense_vector_handle_t dvhandle, std::int64_t size, sycl::buffer val); template std::enable_if_t> set_dense_vector_data( - sycl::queue &queue, dense_vector_handle_t dvhandle, std::int64_t size, dataType *val); + sycl::queue& queue, dense_vector_handle_t dvhandle, std::int64_t size, dataType* val); -sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhandle, - const std::vector &dependencies = {}); +sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhandle, + const std::vector& dependencies = {}); // Dense matrix template std::enable_if_t> init_dense_matrix( - sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, std::int64_t num_rows, + sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); template std::enable_if_t> init_dense_matrix( - sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType *val); + sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType* val); template std::enable_if_t> set_dense_matrix_data( - sycl::queue &queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, + sycl::queue& queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, layout dense_layout, sycl::buffer val); template std::enable_if_t> set_dense_matrix_data( - sycl::queue &queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, - std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType *val); + sycl::queue& queue, dense_matrix_handle_t dmhandle, std::int64_t num_rows, + std::int64_t num_cols, std::int64_t ld, layout dense_layout, dataType* val); -sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, - const std::vector &dependencies = {}); +sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhandle, + const std::vector& dependencies = {}); // COO matrix template std::enable_if_t> init_coo_matrix( - sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val); template std::enable_if_t> init_coo_matrix( - sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ind, indexType *col_ind, dataType *val); + sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ind, indexType* col_ind, dataType* val); template std::enable_if_t> set_coo_matrix_data( - sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val); template std::enable_if_t> set_coo_matrix_data( - sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ind, indexType *col_ind, dataType *val); + sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ind, indexType* col_ind, dataType* val); // CSR matrix template std::enable_if_t> init_csr_matrix( - sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val); template std::enable_if_t> init_csr_matrix( - sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ptr, indexType *col_ind, dataType *val); + sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ptr, indexType* col_ind, dataType* val); template std::enable_if_t> set_csr_matrix_data( - sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val); template std::enable_if_t> set_csr_matrix_data( - sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, - std::int64_t nnz, index_base index, indexType *row_ptr, indexType *col_ind, dataType *val); + sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, + std::int64_t nnz, index_base index, indexType* row_ptr, indexType* col_ind, dataType* val); // Common sparse matrix functions -sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, - const std::vector &dependencies = {}); +sycl::event release_sparse_matrix(sycl::queue& queue, matrix_handle_t smhandle, + const std::vector& dependencies = {}); -bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property); +bool set_matrix_property(sycl::queue& queue, matrix_handle_t smhandle, matrix_property property); // SPMM -void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr); +void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr); -sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}); +sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr, + const std::vector& dependencies = {}); -void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, +void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); -void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, +void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace); -sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, +sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies = {}); + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies = {}); -sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, +sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); // SPMV -void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr); +void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr); -sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, - const std::vector &dependencies = {}); +sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr, + const std::vector& dependencies = {}); -void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, std::size_t &temp_buffer_size); + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t& temp_buffer_size); -void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, sycl::buffer workspace); -sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, - void *workspace, const std::vector &dependencies = {}); + void* workspace, const std::vector& dependencies = {}); -sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, const std::vector &dependencies = {}); + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, const std::vector& dependencies = {}); // SPSV -void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr); +void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr); -sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}); +sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr, + const std::vector& dependencies = {}); -void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); -void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, sycl::buffer workspace); -sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, - spsv_alg alg, spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies = {}); + spsv_alg alg, spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies = {}); -sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); } // namespace sparse } // namespace mkl diff --git a/src/blas/backends/cublas/cublas_batch.cpp b/src/blas/backends/cublas/cublas_batch.cpp index 2975e6c58..4a88ea56f 100644 --- a/src/blas/backends/cublas/cublas_batch.cpp +++ b/src/blas/backends/cublas/cublas_batch.cpp @@ -29,122 +29,122 @@ namespace column_major { // Buffer APIs -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, float beta, sycl::buffer &y, int64_t incy, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, float beta, sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, double beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, double beta, + sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, - int64_t stride_x, std::complex beta, sycl::buffer, 1> &y, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, + sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, sycl::buffer &c, int64_t ldc, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } template -inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, Ts alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - Ts beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +inline void gemm_batch_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, Ts alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + Ts beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { using cuTypeA = typename CudaEquivalentType::Type; using cuTypeB = typename CudaEquivalentType::Type; @@ -153,7 +153,7 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran overflow_check(m, n, k, lda, ldb, ldc, stride_a, stride_b, stride_c, batch_size); cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { if (!verify_support(queue, sycl::aspect::fp16)) { throw oneapi::mkl::unimplemented( "blas", "sycl::half", "half is not supported by the device or the sycl compiler"); @@ -161,19 +161,19 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - CUBLAS_ERROR_FUNC_T( - "cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, handle, - get_cublas_operation(transa), get_cublas_operation(transb), m, n, k, &alpha, a_, - get_cublas_datatype(), lda, stride_a, b_, get_cublas_datatype(), - ldb, stride_b, &beta, c_, get_cublas_datatype(), ldc, stride_c, batch_size, - get_cublas_datatype(), cublas_gemm_algo); + CUBLAS_ERROR_FUNC_T("cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, + handle, get_cublas_operation(transa), get_cublas_operation(transb), + m, n, k, &alpha, a_, get_cublas_datatype(), lda, stride_a, + b_, get_cublas_datatype(), ldb, stride_b, &beta, c_, + get_cublas_datatype(), ldc, stride_c, batch_size, + get_cublas_datatype(), cublas_gemm_algo); #else CUBLAS_ERROR_FUNC_T_SYNC( "cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, handle, @@ -187,10 +187,10 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran } #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, \ int64_t batch_size) { \ gemm_batch_impl(queue, transa, transb, m, n, k, alpha, a, \ lda, stride_a, b, ldb, stride_b, beta, c, \ @@ -209,10 +209,10 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::complex, std::complex, std::com #undef GEMM_STRIDED_BATCH_LAUNCHER #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, \ int64_t batch_size) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ @@ -225,307 +225,308 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } // USM APIs -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, float **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, double **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t incx, - std::int64_t stridex, float *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, + std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - std::int64_t stridex, double *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for column_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float beta, float *y, int64_t incy, int64_t stride_y, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, const double *x, - int64_t incx, int64_t stride_x, double beta, double *y, int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, const double* x, + int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for column_major layout"); } template -inline sycl::event gemv_batch(const char *func_name, Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, const T **x, - int64_t *incx, T *beta, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event gemv_batch(const char* func_name, Func func, sycl::queue& queue, + transpose* trans, int64_t* m, int64_t* n, T* alpha, const T** a, + int64_t* lda, const T** x, int64_t* incx, T* beta, T** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], incx[i], incy[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; cublasStatus_t err; - auto **a_ = reinterpret_cast(a); - auto **x_ = reinterpret_cast(x); - auto **y_ = reinterpret_cast(y); + auto** a_ = reinterpret_cast(a); + auto** x_ = reinterpret_cast(x); + auto** y_ = reinterpret_cast(y); for (int64_t i = 0; i < group_count; i++) { - cublas_native_named_func( - func_name, func, err, handle, get_cublas_operation(trans[i]), - (int)m[i], (int)n[i], - (cuDataType *)&alpha[i], a_ + offset, (int)lda[i], x_ + offset, (int)incx[i], - (cuDataType *)&beta[i], y_ + offset, (int)incy[i], (int)group_size[i]); + cublas_native_named_func(func_name, func, err, handle, + get_cublas_operation(trans[i]), (int)m[i], (int)n[i], + (cuDataType*)&alpha[i], a_ + offset, (int)lda[i], + x_ + offset, (int)incx[i], (cuDataType*)&beta[i], + y_ + offset, (int)incy[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -533,14 +534,13 @@ inline sycl::event gemv_batch(const char *func_name, Func func, sycl::queue &que return done; } -#define GEMV_BATCH_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemv_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, \ - TYPE *alpha, const TYPE **a, int64_t *lda, const TYPE **x, \ - int64_t *incx, TYPE *beta, TYPE **y, int64_t *incy, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ - return gemv_batch(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, \ - x, incx, beta, y, incy, group_count, group_size, dependencies); \ +#define GEMV_BATCH_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ + sycl::event gemv_batch( \ + sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, TYPE* alpha, const TYPE** a, \ + int64_t* lda, const TYPE** x, int64_t* incx, TYPE* beta, TYPE** y, int64_t* incy, \ + int64_t group_count, int64_t* group_size, const std::vector& dependencies) { \ + return gemv_batch(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, \ + incx, beta, y, incy, group_count, group_size, dependencies); \ } GEMV_BATCH_LAUNCHER_USM(float, cublasSgemvBatched) @@ -550,72 +550,72 @@ GEMV_BATCH_LAUNCHER_USM(std::complex, cublasZgemvBatched) #undef GEMV_BATCH_LAUNCHER_USM -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const float *a, - int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const double *a, - int64_t lda, int64_t stride_a, const double *x, int64_t incx, - int64_t stride_x, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stride_a, const double* x, int64_t incx, + int64_t stride_x, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, double **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for column_major layout"); } template -inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose transa, +inline sycl::event gemm_batch_strided_usm_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - Ts alpha, const Ta *a, int64_t lda, int64_t stride_a, - const Tb *b, int64_t ldb, int64_t stride_b, Ts beta, - Tc *c, int64_t ldc, int64_t stride_c, + Ts alpha, const Ta* a, int64_t lda, int64_t stride_a, + const Tb* b, int64_t ldb, int64_t stride_b, Ts beta, + Tc* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuTypeA = typename CudaEquivalentType::Type; using cuTypeB = typename CudaEquivalentType::Type; using cuTypeC = typename CudaEquivalentType::Type; @@ -623,7 +623,7 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra overflow_check(m, n, k, lda, ldb, ldc, stride_a, stride_b, stride_c, batch_size); cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { if (!verify_support(queue, sycl::aspect::fp16)) { throw oneapi::mkl::unimplemented( "blas", "sycl::half", "half is not supported by the device or the sycl compiler"); @@ -632,16 +632,16 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cublasStatus_t err; #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - CUBLAS_ERROR_FUNC_T( - "cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, handle, - get_cublas_operation(transa), get_cublas_operation(transb), m, n, k, &alpha, a, - get_cublas_datatype(), lda, stride_a, b, get_cublas_datatype(), - ldb, stride_b, &beta, c, get_cublas_datatype(), ldc, stride_c, batch_size, - get_cublas_datatype(), cublas_gemm_algo); + CUBLAS_ERROR_FUNC_T("cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, + handle, get_cublas_operation(transa), get_cublas_operation(transb), + m, n, k, &alpha, a, get_cublas_datatype(), lda, stride_a, + b, get_cublas_datatype(), ldb, stride_b, &beta, c, + get_cublas_datatype(), ldc, stride_c, batch_size, + get_cublas_datatype(), cublas_gemm_algo); #else CUBLAS_ERROR_FUNC_T_SYNC( "cublasGemmStridedBatchedEx", cublasGemmStridedBatchedEx, err, handle, @@ -656,11 +656,11 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra } #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stride_a, const TYPE_B *b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stride_c, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stride_a, const TYPE_B* b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stride_c, \ + int64_t batch_size, const std::vector& dependencies) { \ return gemm_batch_strided_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, \ stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, \ batch_size, dependencies); \ @@ -678,11 +678,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::complex, std::complex, std: #undef GEMM_STRIDED_BATCH_LAUNCHER_USM #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stride_a, const TYPE_B *b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stride_c, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stride_a, const TYPE_B* b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stride_c, \ + int64_t batch_size, const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -695,11 +695,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, transpose *transb, - int64_t *m, int64_t *n, int64_t *k, Ts *alpha, const Ta **a, - int64_t *lda, const Tb **b, int64_t *ldb, Ts *beta, Tc **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event gemm_batch_usm_impl(sycl::queue& queue, transpose* transa, transpose* transb, + int64_t* m, int64_t* n, int64_t* k, Ts* alpha, const Ta** a, + int64_t* lda, const Tb** b, int64_t* ldb, Ts* beta, Tc** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using cuTypeA = typename CudaEquivalentType::Type; using cuTypeB = typename CudaEquivalentType::Type; using cuTypeC = typename CudaEquivalentType::Type; @@ -709,7 +709,7 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr } cublasGemmAlgo_t cublas_gemm_algo = CUBLAS_GEMM_DEFAULT; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { if (!verify_support(queue, sycl::aspect::fp16)) { throw oneapi::mkl::unimplemented( "blas", "sycl::half", "half is not supported by the device or the sycl compiler"); @@ -718,7 +718,7 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; cublasStatus_t err; @@ -727,10 +727,10 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr CUBLAS_ERROR_FUNC_T( "cublasGemmBatchedEx", cublasGemmBatchedEx, err, handle, get_cublas_operation(transa[i]), get_cublas_operation(transb[i]), (int)m[i], - (int)n[i], (int)k[i], &alpha[i], (const void *const *)(a + offset), - get_cublas_datatype(), (int)lda[i], (const void *const *)(b + offset), + (int)n[i], (int)k[i], &alpha[i], (const void* const*)(a + offset), + get_cublas_datatype(), (int)lda[i], (const void* const*)(b + offset), get_cublas_datatype(), (int)ldb[i], &beta[i], - (void *const *)(c + offset), get_cublas_datatype(), (int)ldc[i], + (void* const*)(c + offset), get_cublas_datatype(), (int)ldc[i], (int)group_size[i], get_cublas_datatype(), cublas_gemm_algo); #else CUBLAS_ERROR_FUNC_T_SYNC( @@ -750,11 +750,11 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr } #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return gemm_batch_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc, group_count, group_size, dependencies); \ } @@ -771,11 +771,11 @@ GEMM_BATCH_LAUNCHER_USM(std::complex, std::complex, std::complex #undef GEMM_BATCH_LAUNCHER_USM #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -787,63 +787,63 @@ GEMM_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_BATCH_LAUNCHER_USM -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for column_major layout"); } template -inline sycl::event trsm_batch(const char *func_name, Func func, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, T *alpha, const T **a, - int64_t *lda, T **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event trsm_batch(const char* func_name, Func func, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, T* alpha, const T** a, + int64_t* lda, T** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], ldb[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; cublasStatus_t err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **b_ = reinterpret_cast(b); + auto** a_ = reinterpret_cast(a); + auto** b_ = reinterpret_cast(b); cublas_native_named_func( func_name, func, err, handle, get_cublas_side_mode(left_right[i]), get_cublas_fill_mode(upper_lower[i]), get_cublas_operation(trans[i]), get_cublas_diag_type(unit_diag[i]), (int)m[i], (int)n[i], - (cuDataType *)&alpha[i], a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], + (cuDataType*)&alpha[i], a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], (int)group_size[i]); offset += group_size[i]; @@ -854,11 +854,11 @@ inline sycl::event trsm_batch(const char *func_name, Func func, sycl::queue &que } #define TRSM_BATCH_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, \ - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, TYPE *alpha, \ - const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, \ + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, TYPE* alpha, \ + const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return trsm_batch(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, \ dependencies); \ @@ -871,208 +871,208 @@ TRSM_BATCH_LAUNCHER_USM(std::complex, cublasZtrsmBatched) #undef TRSM_BATCH_LAUNCHER_USM -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stride_a, float beta, - float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stride_a, double beta, - double *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, float **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, float** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, double **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, double** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } @@ -1081,122 +1081,122 @@ namespace row_major { // Buffer APIs -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, float beta, sycl::buffer &y, int64_t incy, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, float beta, sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, double beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, double beta, + sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, - int64_t stride_x, std::complex beta, sycl::buffer, 1> &y, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, + sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, sycl::buffer &c, int64_t ldc, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, \ int64_t batch_size) { \ throw unimplemented("blas", "gemm_batch", "for row_major layout"); \ } @@ -1214,377 +1214,377 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::complex, std::complex, std::com #undef GEMM_STRIDED_BATCH_LAUNCHER -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } // USM APIs -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, float **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, double **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t incx, - std::int64_t stridex, float *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, + std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - std::int64_t stridex, double *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float beta, float *y, int64_t incy, int64_t stride_y, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, const double *x, - int64_t incx, int64_t stride_x, double beta, double *y, int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, const double* x, + int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, const float **x, int64_t *incx, float *beta, - float **y, int64_t *incy, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta, + float** y, int64_t* incy, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, const double **x, int64_t *incx, - double *beta, double **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, const double** x, int64_t* incx, + double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const float *a, - int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const double *a, - int64_t lda, int64_t stride_a, const double *x, int64_t incx, - int64_t stride_x, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stride_a, const double* x, int64_t incx, + int64_t stride_x, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, double **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", "for row_major layout"); } #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stride_a, const TYPE_B *b, int64_t ldb, int64_t stride_b, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stride_c, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stride_a, const TYPE_B* b, int64_t ldb, int64_t stride_b, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stride_c, \ + int64_t batch_size, const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", "for row_major layout"); \ } @@ -1602,11 +1602,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::complex, std::complex, std: #undef GEMM_STRIDED_BATCH_LAUNCHER_USM #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", "for row_major layout"); \ } @@ -1623,51 +1623,51 @@ GEMM_BATCH_LAUNCHER_USM(std::complex, std::complex, std::complex #undef GEMM_BATCH_LAUNCHER_USM -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } template -inline sycl::event trsm_batch(const char *func_name, Func func, sycl::queue &queue, - side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, T *alpha, const T **a, - int64_t *lda, T **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event trsm_batch(const char* func_name, Func func, sycl::queue& queue, + side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, T* alpha, const T** a, + int64_t* lda, T** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", "for row_major layout"); } #define TRSM_BATCH_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, \ - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, TYPE *alpha, \ - const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, \ + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, TYPE* alpha, \ + const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return trsm_batch(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, \ dependencies); \ @@ -1680,208 +1680,208 @@ TRSM_BATCH_LAUNCHER_USM(std::complex, cublasZtrsmBatched) #undef TRSM_BATCH_LAUNCHER_USM -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stride_a, float beta, - float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stride_a, double beta, - double *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, const float** a, int64_t* lda, float** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, const double** a, int64_t* lda, double** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + std::complex** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, float **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, float** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, double **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, double** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } diff --git a/src/blas/backends/cublas/cublas_extensions.cpp b/src/blas/backends/cublas/cublas_extensions.cpp index cc80b483d..c80392aa6 100644 --- a/src/blas/backends/cublas/cublas_extensions.cpp +++ b/src/blas/backends/cublas/cublas_extensions.cpp @@ -29,88 +29,88 @@ namespace column_major { // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } template -void omatcopy(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, +void omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); const int64_t logical_m = (trans == oneapi::mkl::transpose::nontrans ? m : n); const int64_t logical_n = (trans == oneapi::mkl::transpose::nontrans ? n : m); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), get_cublas_operation(trans), logical_m, logical_n, - (cuDataType *)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); + (cuDataType*)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); }); }); } #define OMATCOPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { \ omatcopy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb); \ } @@ -122,16 +122,16 @@ OMATCOPY_LAUNCHER(std::complex, cublasZgeam) #undef OMATCOPY_LAUNCHER template -void omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb) { \ + void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb) { \ omatcopy2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, lda, b, \ ldb, strideb); \ } @@ -142,53 +142,53 @@ OMATCOPY2_LAUNCHER(std::complex, "unimplemented") OMATCOPY2_LAUNCHER(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } template -void omatadd(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - T beta, sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + T beta, sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, (cuDataType *)&alpha, a_, - lda, (cuDataType *)&beta, b_, ldb, c_, ldc); + get_cublas_operation(transb), m, n, (cuDataType*)&alpha, a_, + lda, (cuDataType*)&beta, b_, ldb, c_, ldc); }); }); } #define OMATADD_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { \ + void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { \ omatadd(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, \ b, ldb, c, ldc); \ } @@ -202,95 +202,95 @@ OMATADD_LAUNCHER(std::complex, cublasZgeam) // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } template -sycl::event omatcopy(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); const int64_t logical_m = (trans == oneapi::mkl::transpose::nontrans ? m : n); const int64_t logical_n = (trans == oneapi::mkl::transpose::nontrans ? n : m); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), get_cublas_operation(trans), logical_m, logical_n, - (cuDataType *)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); + (cuDataType*)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); }); }); return done; } #define OMATCOPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, TYPE *b, int64_t ldb, \ - const std::vector &dependencies) { \ + sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, TYPE* b, int64_t ldb, \ + const std::vector& dependencies) { \ return omatcopy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, \ ldb, dependencies); \ } @@ -303,16 +303,16 @@ OMATCOPY_LAUNCHER_USM(std::complex, cublasZgeam) #undef OMATCOPY_LAUNCHER_USM template -sycl::event omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, int64_t stridea, T *b, - int64_t ldb, int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, int64_t stridea, T* b, + int64_t ldb, int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, \ - int64_t strideb, const std::vector &dependencies) { \ + sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, \ + int64_t strideb, const std::vector& dependencies) { \ return omatcopy2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, \ lda, b, ldb, strideb, dependencies); \ } @@ -323,58 +323,58 @@ OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER_USM -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } template -inline sycl::event omatadd(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - T beta, const T *b, int64_t ldb, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event omatadd(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + T beta, const T* b, int64_t ldb, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, (cuDataType *)&alpha, a_, - lda, (cuDataType *)&beta, b_, ldb, c_, ldc); + get_cublas_operation(transb), m, n, (cuDataType*)&alpha, a_, + lda, (cuDataType*)&beta, b_, ldb, c_, ldc); }); }); return done; } #define OMATADD_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, \ - const TYPE *b, int64_t ldb, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, \ + const TYPE* b, int64_t ldb, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return omatadd(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, \ lda, beta, b, ldb, c, ldc, dependencies); \ } @@ -392,88 +392,88 @@ namespace row_major { // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } template -void omatcopy(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, +void omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); const int64_t logical_m = (trans == oneapi::mkl::transpose::nontrans ? n : m); const int64_t logical_n = (trans == oneapi::mkl::transpose::nontrans ? m : n); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), get_cublas_operation(trans), logical_m, logical_n, - (cuDataType *)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); + (cuDataType*)&alpha, a_, lda, nullptr, nullptr, lda, b_, ldb); }); }); } #define OMATCOPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { \ omatcopy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb); \ } @@ -485,16 +485,16 @@ OMATCOPY_LAUNCHER(std::complex, cublasZgeam) #undef OMATCOPY_LAUNCHER template -void omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb) { \ + void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb) { \ omatcopy2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, lda, b, \ ldb, strideb); \ } @@ -505,53 +505,53 @@ OMATCOPY2_LAUNCHER(std::complex, "unimplemented") OMATCOPY2_LAUNCHER(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } template -void omatadd(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - T beta, sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + T beta, sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), n, m, (cuDataType *)&alpha, a_, - lda, (cuDataType *)&beta, b_, ldb, c_, ldc); + get_cublas_operation(transb), n, m, (cuDataType*)&alpha, a_, + lda, (cuDataType*)&beta, b_, ldb, c_, ldc); }); }); } #define OMATADD_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { \ + void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { \ omatadd(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, \ b, ldb, c, ldc); \ } @@ -565,95 +565,95 @@ OMATADD_LAUNCHER(std::complex, cublasZgeam) // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } template -sycl::event omatcopy(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); const int64_t logical_m = (trans == oneapi::mkl::transpose::nontrans ? n : m); const int64_t logical_n = (trans == oneapi::mkl::transpose::nontrans ? m : n); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(trans), get_cublas_operation(trans), logical_m, logical_n, - (cuDataType *)&alpha, a_, lda, nullptr, nullptr, ldb, b_, ldb); + (cuDataType*)&alpha, a_, lda, nullptr, nullptr, ldb, b_, ldb); }); }); return done; } #define OMATCOPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, TYPE *b, int64_t ldb, \ - const std::vector &dependencies) { \ + sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, TYPE* b, int64_t ldb, \ + const std::vector& dependencies) { \ return omatcopy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, \ ldb, dependencies); \ } @@ -666,16 +666,16 @@ OMATCOPY_LAUNCHER_USM(std::complex, cublasZgeam) #undef OMATCOPY_LAUNCHER_USM template -sycl::event omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, int64_t stridea, T *b, - int64_t ldb, int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, int64_t stridea, T* b, + int64_t ldb, int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, \ - int64_t strideb, const std::vector &dependencies) { \ + sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, \ + int64_t strideb, const std::vector& dependencies) { \ return omatcopy2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, \ lda, b, ldb, strideb, dependencies); \ } @@ -686,58 +686,58 @@ OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER_USM -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } template -inline sycl::event omatadd(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - T beta, const T *b, int64_t ldb, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event omatadd(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + T beta, const T* b, int64_t ldb, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), n, m, (cuDataType *)&alpha, a_, - lda, (cuDataType *)&beta, b_, ldb, c_, ldc); + get_cublas_operation(transb), n, m, (cuDataType*)&alpha, a_, + lda, (cuDataType*)&beta, b_, ldb, c_, ldc); }); }); return done; } #define OMATADD_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, \ - const TYPE *b, int64_t ldb, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, \ + const TYPE* b, int64_t ldb, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return omatadd(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, \ lda, beta, b, ldb, c, ldc, dependencies); \ } diff --git a/src/blas/backends/cublas/cublas_handle.hpp b/src/blas/backends/cublas/cublas_handle.hpp index db9df5584..83a76c927 100644 --- a/src/blas/backends/cublas/cublas_handle.hpp +++ b/src/blas/backends/cublas/cublas_handle.hpp @@ -28,10 +28,10 @@ namespace cublas { template struct cublas_handle { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t cublas_handle_mapper_{}; ~cublas_handle() noexcept(false) { - for (auto &handle_pair : cublas_handle_mapper_) { + for (auto& handle_pair : cublas_handle_mapper_) { cublasStatus_t err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); diff --git a/src/blas/backends/cublas/cublas_helper.hpp b/src/blas/backends/cublas/cublas_helper.hpp index 0bd4e6274..58df5c006 100644 --- a/src/blas/backends/cublas/cublas_helper.hpp +++ b/src/blas/backends/cublas/cublas_helper.hpp @@ -81,7 +81,7 @@ void overflow_check(Index index, Next... indices) { class cublas_error : virtual public std::runtime_error { protected: - inline const char *cublas_error_map(cublasStatus_t error) { + inline const char* cublas_error_map(cublasStatus_t error) { switch (error) { case CUBLAS_STATUS_SUCCESS: return "CUBLAS_STATUS_SUCCESS"; @@ -133,7 +133,7 @@ class cublas_error : virtual public std::runtime_error { class cuda_error : virtual public std::runtime_error { protected: - inline const char *cuda_error_map(CUresult result) { + inline const char* cuda_error_map(CUresult result) { switch (result) { case CUDA_SUCCESS: return "CUDA_SUCCESS"; case CUDA_ERROR_NOT_PERMITTED: return "CUDA_ERROR_NOT_PERMITTED"; @@ -206,23 +206,22 @@ class cuda_error : virtual public std::runtime_error { cuStreamSynchronize(currentStreamId); template -inline void cublas_native_func(Func func, cublasStatus_t err, - cublasHandle_t handle, Types... args) { +inline void cublas_native_func(Func func, cublasStatus_t err, cublasHandle_t handle, + Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - CUBLAS_ERROR_FUNC(func, err, handle, args...) + CUBLAS_ERROR_FUNC(func, err, handle, args...) #else - CUBLAS_ERROR_FUNC_SYNC(func, err, handle, args...) + CUBLAS_ERROR_FUNC_SYNC(func, err, handle, args...) #endif }; template -inline void cublas_native_named_func(const char *func_name, Func func, - cublasStatus_t err, cublasHandle_t handle, - Types... args) { +inline void cublas_native_named_func(const char* func_name, Func func, cublasStatus_t err, + cublasHandle_t handle, Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, args...) + CUBLAS_ERROR_FUNC_T(func_name, func, err, handle, args...) #else - CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, args...) + CUBLAS_ERROR_FUNC_T_SYNC(func_name, func, err, handle, args...) #endif }; diff --git a/src/blas/backends/cublas/cublas_level1.cpp b/src/blas/backends/cublas/cublas_level1.cpp index 3b0699c87..115712e80 100644 --- a/src/blas/backends/cublas/cublas_level1.cpp +++ b/src/blas/backends/cublas/cublas_level1.cpp @@ -32,16 +32,16 @@ namespace column_major { // Level 1 template -inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void asum(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -49,8 +49,8 @@ inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto res_ = sc.get_mem(res_acc); cublasStatus_t err; // ASUM does not support negative index cublas_native_named_func(func_name, func, err, handle, n, x_, std::abs(incx), res_); @@ -63,8 +63,8 @@ inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define ASUM_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ asum(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } ASUM_LAUNCHER(float, float, cublasSasum) @@ -74,26 +74,26 @@ ASUM_LAUNCHER(std::complex, double, cublasDzasum) #undef ASUM_LAUNCHER template -inline void scal(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 a, - sycl::buffer &x, int64_t incx) { +inline void scal(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1 a, + sycl::buffer& x, int64_t incx) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; // SCAL does not support negative incx - cublas_native_named_func(func_name, func, err, handle, n, (cuDataType1 *)&a, x_, + cublas_native_named_func(func_name, func, err, handle, n, (cuDataType1*)&a, x_, std::abs(incx)); }); }); } #define SCAL_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void scal(sycl::queue &queue, int64_t n, TYPE1 a, sycl::buffer &x, int64_t incx) { \ + void scal(sycl::queue& queue, int64_t n, TYPE1 a, sycl::buffer& x, int64_t incx) { \ scal(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, a, x, incx); \ } SCAL_LAUNCHER(float, float, cublasSscal) @@ -105,27 +105,27 @@ SCAL_LAUNCHER(double, std::complex, cublasZdscal) #undef SCAL_LAUNCHER template -inline void axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - cublas_native_named_func(func_name, func, err, handle, n, (cuDataType *)&alpha, x_, - incx, y_, incy); + cublas_native_named_func(func_name, func, err, handle, n, (cuDataType*)&alpha, x_, incx, + y_, incy); }); }); } #define AXPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void axpy(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void axpy(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ axpy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy); \ } @@ -135,39 +135,39 @@ AXPY_LAUNCHER(std::complex, cublasCaxpy) AXPY_LAUNCHER(std::complex, cublasZaxpy) #undef AXPY_LAUNCHER -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } template -inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +inline void rotg(const char* func_name, Func func, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto s_acc = s.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -175,10 +175,10 @@ inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buf // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); - auto s_ = sc.get_mem(s_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); + auto s_ = sc.get_mem(s_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, a_, b_, c_, s_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -190,8 +190,8 @@ inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buf } #define ROTG_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, \ - sycl::buffer &c, sycl::buffer &s) { \ + void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, \ + sycl::buffer& c, sycl::buffer& s) { \ rotg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, a, b, c, s); \ } @@ -202,16 +202,16 @@ ROTG_LAUNCHER(std::complex, double, cublasZrotg) #undef ROTG_LAUNCHER template -inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer ¶m) { +inline void rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& param) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); auto param_acc = param.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -219,9 +219,9 @@ inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto param_ = sc.get_mem(param_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto param_ = sc.get_mem(param_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, param_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -233,8 +233,8 @@ inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define ROTM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { \ + void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy, sycl::buffer& param) { \ rotm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, param); \ } @@ -243,17 +243,17 @@ ROTM_LAUNCHER(double, cublasDrotm) #undef ROTM_LAUNCHER template -inline void copy(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void copy(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy); }); @@ -261,8 +261,8 @@ inline void copy(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define COPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ copy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -273,16 +273,16 @@ COPY_LAUNCHER(std::complex, cublasZcopy) #undef COPY_LAUNCHER template -inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &result) { +inline void dot(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& result) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -290,9 +290,9 @@ inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto res_ = sc.get_mem(res_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -304,8 +304,8 @@ inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, } #define DOT_LAUNCHER(EXT, TYPE, CUBLAS_ROUTINE) \ - void dot##EXT(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, const int64_t incy, sycl::buffer &result) { \ + void dot##EXT(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, const int64_t incy, sycl::buffer& result) { \ dot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, result); \ } DOT_LAUNCHER(, float, cublasSdot) @@ -317,17 +317,17 @@ DOT_LAUNCHER(u, std::complex, cublasZdotu) #undef DOT_LAUNCHER template -inline void rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &y, int64_t incy, +inline void rot(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& y, int64_t incy, T2 c, T3 s) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; using cuDataType3 = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -335,18 +335,18 @@ inline void rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. // cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, - (cuDataType2 *)&c, (cuDataType3 *)&s); + (cuDataType2*)&c, (cuDataType3*)&s); }); }); } #define ROT_LAUNCHER(TYPE1, TYPE2, TYPE3, CUBLAS_ROUTINE) \ - void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, int64_t incy, TYPE2 c, TYPE3 s) { \ + void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, int64_t incy, TYPE2 c, TYPE3 s) { \ rot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s); \ } @@ -356,15 +356,15 @@ ROT_LAUNCHER(std::complex, float, float, cublasCsrot) ROT_LAUNCHER(std::complex, double, double, cublasZdrot) #undef ROT_LAUNCHER -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { overflow_check(n, incx, incy); // cuBLAS does not support sdot so we need to mimic sdot. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.get_access(cgh); auto y_acc = y.get_access(cgh); auto res_acc = result.get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -372,9 +372,9 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto res_ = sc.get_mem(res_acc); cublasStatus_t err; cublas_native_func(cublasSdot, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -388,23 +388,23 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, result.get_host_access(sycl::read_write)[0] += sb; } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", "for column_major layout"); } template -inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, T y1, sycl::buffer ¶m) { +inline void rotmg(const char* func_name, Func func, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, T y1, sycl::buffer& param) { using cuDataType = typename CudaEquivalentType::Type; sycl::buffer y1_buff(&y1, sycl::range<1>(1)); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto d1_acc = d1.template get_access(cgh); auto d2_acc = d2.template get_access(cgh); auto x1_acc = x1.template get_access(cgh); auto y1_acc = y1_buff.template get_access(cgh); auto param_acc = param.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -412,11 +412,11 @@ inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::bu // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto d1_ = sc.get_mem(d1_acc); - auto d2_ = sc.get_mem(d2_acc); - auto x1_ = sc.get_mem(x1_acc); - auto y1_ = sc.get_mem(y1_acc); - auto param_ = sc.get_mem(param_acc); + auto d1_ = sc.get_mem(d1_acc); + auto d2_ = sc.get_mem(d2_acc); + auto x1_ = sc.get_mem(x1_acc); + auto y1_ = sc.get_mem(y1_acc); + auto param_ = sc.get_mem(param_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, d1_, d2_, x1_, y1_, param_); // Higher level BLAS functions expect CUBLAS_POINTER_MODE_HOST @@ -428,8 +428,8 @@ inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::bu } #define ROTMG_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, \ - sycl::buffer &x1, TYPE y1, sycl::buffer ¶m) { \ + void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, \ + sycl::buffer& x1, TYPE y1, sycl::buffer& param) { \ rotmg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, d1, d2, x1, y1, param); \ } @@ -438,8 +438,8 @@ ROTMG_LAUNCHER(double, cublasDrotmg) #undef ROTMG_LAUNCHER template -inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void iamax(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); // cuBLAS does not support int64_t as return type for the data. So we need to @@ -450,10 +450,10 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t // to elementwise copy the data between two buffer, or allow reinterpret cast // to convert to different type with different typesize size. sycl::buffer int_res_buff{ sycl::range<1>(1) }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto int_res_acc = int_res_buff.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -461,8 +461,8 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto int_res_ = sc.get_mem(int_res_acc); + auto x_ = sc.get_mem(x_acc); + auto int_res_ = sc.get_mem(int_res_acc); cublasStatus_t err; // For negative incx, iamax returns 0. This behaviour is similar to that of // reference netlib BLAS. @@ -474,7 +474,7 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto int_res_acc = int_res_buff.template get_access(cgh); auto result_acc = result.template get_access(cgh); cgh.single_task( @@ -483,8 +483,8 @@ inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t } #define IAMAX_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamax(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } IAMAX_LAUNCHER(float, cublasIsamax) @@ -494,17 +494,17 @@ IAMAX_LAUNCHER(std::complex, cublasIzamax) #undef IAMAX_LAUNCHER template -inline void swap(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void swap(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy); }); @@ -512,8 +512,8 @@ inline void swap(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define SWAP_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ swap(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -524,8 +524,8 @@ SWAP_LAUNCHER(std::complex, cublasZswap) #undef SWAP_LAUNCHER template -inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void iamin(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); // cuBLAS does not support int64_t as return type for the data. So we need to @@ -536,10 +536,10 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t // to elementwise copy the data between two buffer, or allow reinterpret cast // to convert to different type with different typesize size. sycl::buffer int_res_buff{ sycl::range<1>(1) }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto int_res_acc = int_res_buff.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -547,8 +547,8 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto int_res_ = sc.get_mem(int_res_acc); + auto x_ = sc.get_mem(x_acc); + auto int_res_ = sc.get_mem(int_res_acc); cublasStatus_t err; // For negative incx, iamin returns 0. This behaviour is similar to that of // implemented as a reference IAMIN. @@ -560,7 +560,7 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto int_res_acc = int_res_buff.template get_access(cgh); auto result_acc = result.template get_access(cgh); cgh.single_task( @@ -569,8 +569,8 @@ inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t } #define IAMIN_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamin(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } IAMIN_LAUNCHER(float, cublasIsamin) @@ -580,16 +580,16 @@ IAMIN_LAUNCHER(std::complex, cublasIzamin) #undef IAMIN_LAUNCHER template -inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the CUBLAS_POINTER_MODE_HOST // when the data is on buffer, it must be set to @@ -597,8 +597,8 @@ inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); - auto x_ = sc.get_mem(x_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto res_ = sc.get_mem(res_acc); cublasStatus_t err; // NRM2 does not support negative index cublas_native_named_func(func_name, func, err, handle, n, x_, std::abs(incx), res_); @@ -611,8 +611,8 @@ inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n } #define NRM2_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ nrm2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } NRM2_LAUNCHER(float, float, cublasSnrm2) @@ -625,24 +625,24 @@ NRM2_LAUNCHER(std::complex, double, cublasDznrm2) // Level 1 template -inline sycl::event asum(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T1 *x, const int64_t incx, T2 *result, - const std::vector &dependencies) { +inline sycl::event asum(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T1* x, const int64_t incx, T2* result, + const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto res_ = reinterpret_cast(result); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -658,8 +658,8 @@ inline sycl::event asum(const char *func_name, Func func, sycl::queue &queue, in } #define ASUM_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event asum(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event asum(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return asum(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } ASUM_LAUNCHER_USM(float, float, cublasSasum) @@ -669,22 +669,22 @@ ASUM_LAUNCHER_USM(std::complex, double, cublasDzasum) #undef ASUM_LAUNCHER_USM template -inline sycl::event scal(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 a, - T2 *x, int64_t incx, const std::vector &dependencies) { +inline sycl::event scal(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1 a, + T2* x, int64_t incx, const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); + auto x_ = reinterpret_cast(x); cublasStatus_t err; // SCAL does not support negative incx - cublas_native_named_func(func_name, func, err, handle, n, (cuDataType1 *)&a, x_, + cublas_native_named_func(func_name, func, err, handle, n, (cuDataType1*)&a, x_, std::abs(incx)); }); }); @@ -692,8 +692,8 @@ inline sycl::event scal(const char *func_name, Func func, sycl::queue &queue, in } #define SCAL_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event scal(sycl::queue &queue, int64_t n, TYPE1 a, TYPE2 *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event scal(sycl::queue& queue, int64_t n, TYPE1 a, TYPE2* x, int64_t incx, \ + const std::vector& dependencies) { \ return scal(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, a, x, incx, dependencies); \ } SCAL_LAUNCHER_USM(float, float, cublasSscal) @@ -705,31 +705,31 @@ SCAL_LAUNCHER_USM(double, std::complex, cublasZdscal) #undef SCAL_LAUNCHER_USM template -inline sycl::event axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n, T alpha, - const T *x, int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n, T alpha, + const T* x, int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; - cublas_native_named_func(func_name, func, err, handle, n, (cuDataType *)&alpha, x_, - incx, y_, incy); + cublas_native_named_func(func_name, func, err, handle, n, (cuDataType*)&alpha, x_, incx, + y_, incy); }); }); return done; } #define AXPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event axpy(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - TYPE *y, int64_t incy, const std::vector &dependencies) { \ + sycl::event axpy(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + TYPE* y, int64_t incy, const std::vector& dependencies) { \ return axpy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, \ dependencies); \ } @@ -740,32 +740,32 @@ AXPY_LAUNCHER_USM(std::complex, cublasCaxpy) AXPY_LAUNCHER_USM(std::complex, cublasZaxpy) #undef AXPY_LAUNCHER_USM -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } template -inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, - T1 *s, const std::vector &dependencies) { +inline sycl::event rotg(const char* func_name, Func func, sycl::queue& queue, T1* a, T1* b, T2* c, + T1* s, const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; auto ctx = queue.get_context(); @@ -783,17 +783,17 @@ inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 "If any pointer is only device accessible, all must be device accessible"); } } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); - auto s_ = reinterpret_cast(s); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); if (results_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -808,8 +808,8 @@ inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 } #define ROTG_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event rotg(sycl::queue &queue, TYPE1 *a, TYPE1 *b, TYPE2 *c, TYPE1 *s, \ - const std::vector &dependencies) { \ + sycl::event rotg(sycl::queue& queue, TYPE1* a, TYPE1* b, TYPE2* c, TYPE1* s, \ + const std::vector& dependencies) { \ return rotg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, a, b, c, s, dependencies); \ } @@ -820,21 +820,21 @@ ROTG_LAUNCHER_USM(std::complex, double, cublasZrotg) #undef ROTG_LAUNCHER_USM template -inline sycl::event rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n, T *x, - int64_t incx, T *y, int64_t incy, T *param, - const std::vector &dependencies) { +inline sycl::event rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n, T* x, + int64_t incx, T* y, int64_t incy, T* param, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto param_ = reinterpret_cast(param); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto param_ = reinterpret_cast(param); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, param_); }); @@ -843,8 +843,8 @@ inline sycl::event rotm(const char *func_name, Func func, sycl::queue &queue, in } #define ROTM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event rotm(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - TYPE *param, const std::vector &dependencies) { \ + sycl::event rotm(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + TYPE* param, const std::vector& dependencies) { \ return rotm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, param, \ dependencies); \ } @@ -854,20 +854,20 @@ ROTM_LAUNCHER_USM(double, cublasDrotm) #undef ROTM_LAUNCHER_USM template -inline sycl::event copy(const char *func_name, Func func, sycl::queue &queue, int64_t n, const T *x, - int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event copy(const char* func_name, Func func, sycl::queue& queue, int64_t n, const T* x, + int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy); }); @@ -876,8 +876,8 @@ inline sycl::event copy(const char *func_name, Func func, sycl::queue &queue, in } #define COPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event copy(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event copy(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return copy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -888,23 +888,23 @@ COPY_LAUNCHER_USM(std::complex, cublasZcopy) #undef COPY_LAUNCHER_USM template -inline sycl::event dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, const T *x, - const int64_t incx, const T *y, int64_t incy, T *result, - const std::vector &dependencies) { +inline sycl::event dot(const char* func_name, Func func, sycl::queue& queue, int64_t n, const T* x, + const int64_t incx, const T* y, int64_t incy, T* result, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto res_ = reinterpret_cast(result); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -919,9 +919,9 @@ inline sycl::event dot(const char *func_name, Func func, sycl::queue &queue, int } #define DOT_LAUNCHER_USM(EXT, TYPE, CUBLAS_ROUTINE) \ - sycl::event dot##EXT(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - const TYPE *y, const int64_t incy, TYPE *result, \ - const std::vector &dependencies) { \ + sycl::event dot##EXT(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + const TYPE* y, const int64_t incy, TYPE* result, \ + const std::vector& dependencies) { \ return dot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, result, \ dependencies); \ } @@ -934,34 +934,34 @@ DOT_LAUNCHER_USM(u, std::complex, cublasZdotu) #undef DOT_LAUNCHER_USM template -inline sycl::event rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 *x, - const int64_t incx, T1 *y, int64_t incy, T2 c, T3 s, - const std::vector &dependencies) { +inline sycl::event rot(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1* x, + const int64_t incx, T1* y, int64_t incy, T2 c, T3 s, + const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; using cuDataType3 = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy, - (cuDataType2 *)&c, (cuDataType3 *)&s); + (cuDataType2*)&c, (cuDataType3*)&s); }); }); return done; } #define ROT_LAUNCHER_USM(TYPE1, TYPE2, TYPE3, CUBLAS_ROUTINE) \ - sycl::event rot(sycl::queue &queue, int64_t n, TYPE1 *x, const int64_t incx, TYPE1 *y, \ + sycl::event rot(sycl::queue& queue, int64_t n, TYPE1* x, const int64_t incx, TYPE1* y, \ int64_t incy, TYPE2 c, TYPE3 s, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return rot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s, \ dependencies); \ } @@ -972,23 +972,23 @@ ROT_LAUNCHER_USM(std::complex, float, float, cublasCsrot) ROT_LAUNCHER_USM(std::complex, double, double, cublasZdrot) #undef ROT_LAUNCHER_USM -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { overflow_check(n, incx, incy); bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; // cuBLAS does not support sdsdot so we need to mimic sdot. - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto res_ = reinterpret_cast(result); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -1017,14 +1017,14 @@ sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int6 } } -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { throw unimplemented("blas", "dot", "for column_major layout"); } template -inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T *d1, T *d2, T *x1, - T y1, T *param, const std::vector &dependencies) { +inline sycl::event rotmg(const char* func_name, Func func, sycl::queue& queue, T* d1, T* d2, T* x1, + T y1, T* param, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; auto ctx = queue.get_context(); bool results_on_device = (sycl::get_pointer_type(d1, ctx) == sycl::usm::alloc::device || @@ -1039,22 +1039,22 @@ inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T "If any pointer is only device accessible, all must be device accessible"); } } - cuDataType *y1_; + cuDataType* y1_; if (results_on_device) { y1_ = sycl::malloc_device(1, queue); queue.memcpy(y1_, &y1, sizeof(cuDataType)).wait(); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto d1_ = reinterpret_cast(d1); - auto d2_ = reinterpret_cast(d2); - auto x1_ = reinterpret_cast(x1); - auto param_ = reinterpret_cast(param); + auto d1_ = reinterpret_cast(d1); + auto d2_ = reinterpret_cast(d2); + auto x1_ = reinterpret_cast(x1); + auto param_ = reinterpret_cast(param); cublasStatus_t err; if (results_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); @@ -1062,7 +1062,7 @@ inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST); } else { - auto y1_c = reinterpret_cast(&y1); + auto y1_c = reinterpret_cast(&y1); cublas_native_named_func(func_name, func, err, handle, d1_, d2_, x1_, y1_c, param_); } }); @@ -1076,8 +1076,8 @@ inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T } #define ROTMG_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event rotmg(sycl::queue &queue, TYPE *d1, TYPE *d2, TYPE *x1, TYPE y1, TYPE *param, \ - const std::vector &dependencies) { \ + sycl::event rotmg(sycl::queue& queue, TYPE* d1, TYPE* d2, TYPE* x1, TYPE y1, TYPE* param, \ + const std::vector& dependencies) { \ return rotmg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, d1, d2, x1, y1, param, dependencies); \ } @@ -1086,9 +1086,9 @@ ROTMG_LAUNCHER_USM(double, cublasDrotmg) #undef ROTMG_LAUNCHER_USM template -inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T *x, const int64_t incx, int64_t *result, - const std::vector &dependencies) { +inline sycl::event iamax(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T* x, const int64_t incx, int64_t* result, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); // cuBLAS does not support int64_t as return type for the data. So we need to @@ -1097,7 +1097,7 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i // This change may cause failure as the result of integer overflow // based on the size. int int_res = 0; - int *int_res_p = nullptr; + int* int_res_p = nullptr; bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; if (result_on_device) { @@ -1106,14 +1106,14 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i else { int_res_p = &int_res; } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); + auto x_ = reinterpret_cast(x); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -1128,7 +1128,7 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i }); done.wait(); if (result_on_device) { - auto last_ev = queue.submit([&](sycl::handler &cgh) { + auto last_ev = queue.submit([&](sycl::handler& cgh) { cgh.single_task([=]() { *result = std::max((int64_t)*int_res_p - 1, (int64_t)0); }); }); last_ev.wait(); @@ -1142,8 +1142,8 @@ inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, i } #define IAMAX_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event iamax(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamax(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamax(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } IAMAX_LAUNCHER_USM(float, cublasIsamax) @@ -1153,20 +1153,20 @@ IAMAX_LAUNCHER_USM(std::complex, cublasIzamax) #undef IAMAX_LAUNCHER_USM template -inline sycl::event swap(const char *func_name, Func func, sycl::queue &queue, int64_t n, T *x, - int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event swap(const char* func_name, Func func, sycl::queue& queue, int64_t n, T* x, + int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, n, x_, incx, y_, incy); }); @@ -1175,8 +1175,8 @@ inline sycl::event swap(const char *func_name, Func func, sycl::queue &queue, in } #define SWAP_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event swap(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event swap(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return swap(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1187,9 +1187,9 @@ SWAP_LAUNCHER_USM(std::complex, cublasZswap) #undef SWAP_LAUNCHER_USM template -inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T *x, const int64_t incx, int64_t *result, - const std::vector &dependencies) { +inline sycl::event iamin(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T* x, const int64_t incx, int64_t* result, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); // cuBLAS does not support int64_t as return type for the data. So we need to @@ -1198,7 +1198,7 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i // This change may cause failure as the result of integer overflow // based on the size. int int_res = 0; - int *int_res_p = nullptr; + int* int_res_p = nullptr; bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; if (result_on_device) { @@ -1207,14 +1207,14 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i else { int_res_p = &int_res; } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); + auto x_ = reinterpret_cast(x); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -1229,7 +1229,7 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i }); done.wait(); if (result_on_device) { - auto last_ev = queue.submit([&](sycl::handler &cgh) { + auto last_ev = queue.submit([&](sycl::handler& cgh) { cgh.single_task([=]() { *result = std::max((int64_t)*int_res_p - 1, (int64_t)0); }); }); last_ev.wait(); @@ -1243,8 +1243,8 @@ inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, i } #define IAMIN_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event iamin(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamin(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamin(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } IAMIN_LAUNCHER_USM(float, cublasIsamin) @@ -1254,24 +1254,24 @@ IAMIN_LAUNCHER_USM(std::complex, cublasIzamin) #undef IAMIN_LAUNCHER_USM template -inline sycl::event nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T1 *x, const int64_t incx, T2 *result, - const std::vector &dependencies) { +inline sycl::event nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T1* x, const int64_t incx, T2* result, + const std::vector& dependencies) { using cuDataType1 = typename CudaEquivalentType::Type; using cuDataType2 = typename CudaEquivalentType::Type; overflow_check(n, incx); bool result_on_device = sycl::get_pointer_type(result, queue.get_context()) == sycl::usm::alloc::device; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto res_ = reinterpret_cast(result); if (result_on_device) { cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_DEVICE); } @@ -1287,8 +1287,8 @@ inline sycl::event nrm2(const char *func_name, Func func, sycl::queue &queue, in } #define NRM2_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event nrm2(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event nrm2(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return nrm2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } NRM2_LAUNCHER_USM(float, float, cublasSnrm2) @@ -1304,14 +1304,14 @@ namespace row_major { // Level 1 template -inline void asum(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void asum(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "asum", "for row_major layout"); } #define ASUM_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ asum(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } ASUM_LAUNCHER(float, float, cublasSasum) @@ -1321,13 +1321,13 @@ ASUM_LAUNCHER(std::complex, double, cublasDzasum) #undef ASUM_LAUNCHER template -inline void scal(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 a, - sycl::buffer &x, int64_t incx) { +inline void scal(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1 a, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "scal", "for row_major layout"); } #define SCAL_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void scal(sycl::queue &queue, int64_t n, TYPE1 a, sycl::buffer &x, int64_t incx) { \ + void scal(sycl::queue& queue, int64_t n, TYPE1 a, sycl::buffer& x, int64_t incx) { \ scal(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, a, x, incx); \ } SCAL_LAUNCHER(float, float, cublasSscal) @@ -1339,14 +1339,14 @@ SCAL_LAUNCHER(double, std::complex, cublasZdscal) #undef SCAL_LAUNCHER template -inline void axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpy", "for row_major layout"); } #define AXPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void axpy(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void axpy(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ axpy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy); \ } @@ -1356,37 +1356,37 @@ AXPY_LAUNCHER(std::complex, cublasCaxpy) AXPY_LAUNCHER(std::complex, cublasZaxpy) #undef AXPY_LAUNCHER -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } template -inline void rotg(const char *func_name, Func func, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +inline void rotg(const char* func_name, Func func, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { throw unimplemented("blas", "rotg", "for row_major layout"); } #define ROTG_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, \ - sycl::buffer &c, sycl::buffer &s) { \ + void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, \ + sycl::buffer& c, sycl::buffer& s) { \ rotg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, a, b, c, s); \ } @@ -1397,15 +1397,15 @@ ROTG_LAUNCHER(std::complex, double, cublasZrotg) #undef ROTG_LAUNCHER template -inline void rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer ¶m) { +inline void rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& param) { throw unimplemented("blas", "rotm", "for row_major layout"); } #define ROTM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { \ + void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy, sycl::buffer& param) { \ rotm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, param); \ } @@ -1414,14 +1414,14 @@ ROTM_LAUNCHER(double, cublasDrotm) #undef ROTM_LAUNCHER template -inline void copy(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void copy(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "copy", "for row_major layout"); } #define COPY_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ copy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -1432,15 +1432,15 @@ COPY_LAUNCHER(std::complex, cublasZcopy) #undef COPY_LAUNCHER template -inline void dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &result) { +inline void dot(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& result) { throw unimplemented("blas", "dot", "for row_major layout"); } #define DOT_LAUNCHER(EXT, TYPE, CUBLAS_ROUTINE) \ - void dot##EXT(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, const int64_t incy, sycl::buffer &result) { \ + void dot##EXT(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, const int64_t incy, sycl::buffer& result) { \ dot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, result); \ } DOT_LAUNCHER(, float, cublasSdot) @@ -1452,15 +1452,15 @@ DOT_LAUNCHER(u, std::complex, cublasZdotu) #undef DOT_LAUNCHER template -inline void rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &y, int64_t incy, +inline void rot(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& y, int64_t incy, T2 c, T3 s) { throw unimplemented("blas", "rot", "for row_major layout"); } #define ROT_LAUNCHER(TYPE1, TYPE2, TYPE3, CUBLAS_ROUTINE) \ - void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, int64_t incy, TYPE2 c, TYPE3 s) { \ + void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, int64_t incy, TYPE2 c, TYPE3 s) { \ rot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s); \ } @@ -1470,25 +1470,25 @@ ROT_LAUNCHER(std::complex, float, float, cublasCsrot) ROT_LAUNCHER(std::complex, double, double, cublasZdrot) #undef ROT_LAUNCHER -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "sdsdot", "for row_major layout"); } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", "for row_major layout"); } template -inline void rotmg(const char *func_name, Func func, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, T y1, sycl::buffer ¶m) { +inline void rotmg(const char* func_name, Func func, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, T y1, sycl::buffer& param) { throw unimplemented("blas", "rotmg", "for row_major layout"); } #define ROTMG_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, \ - sycl::buffer &x1, TYPE y1, sycl::buffer ¶m) { \ + void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, \ + sycl::buffer& x1, TYPE y1, sycl::buffer& param) { \ rotmg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, d1, d2, x1, y1, param); \ } @@ -1497,14 +1497,14 @@ ROTMG_LAUNCHER(double, cublasDrotmg) #undef ROTMG_LAUNCHER template -inline void iamax(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void iamax(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "iamax", "for row_major layout"); } #define IAMAX_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamax(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } IAMAX_LAUNCHER(float, cublasIsamax) @@ -1514,14 +1514,14 @@ IAMAX_LAUNCHER(std::complex, cublasIzamax) #undef IAMAX_LAUNCHER template -inline void swap(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy) { +inline void swap(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "swap", "for row_major layout"); } #define SWAP_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ swap(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -1532,14 +1532,14 @@ SWAP_LAUNCHER(std::complex, cublasZswap) #undef SWAP_LAUNCHER template -inline void iamin(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void iamin(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "iamin", "for row_major layout"); } #define IAMIN_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamin(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } IAMIN_LAUNCHER(float, cublasIsamin) @@ -1549,14 +1549,14 @@ IAMIN_LAUNCHER(std::complex, cublasIzamin) #undef IAMIN_LAUNCHER template -inline void nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n, - sycl::buffer &x, const int64_t incx, sycl::buffer &result) { +inline void nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n, + sycl::buffer& x, const int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "nrm2", "for row_major layout"); } #define NRM2_LAUNCHER(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ nrm2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result); \ } NRM2_LAUNCHER(float, float, cublasSnrm2) @@ -1569,15 +1569,15 @@ NRM2_LAUNCHER(std::complex, double, cublasDznrm2) // Level 1 template -inline sycl::event asum(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T1 *x, const int64_t incx, T2 *result, - const std::vector &dependencies) { +inline sycl::event asum(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T1* x, const int64_t incx, T2* result, + const std::vector& dependencies) { throw unimplemented("blas", "asum", "for row_major layout"); } #define ASUM_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event asum(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event asum(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return asum(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } ASUM_LAUNCHER_USM(float, float, cublasSasum) @@ -1587,14 +1587,14 @@ ASUM_LAUNCHER_USM(std::complex, double, cublasDzasum) #undef ASUM_LAUNCHER_USM template -inline sycl::event scal(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 a, - T2 *x, int64_t incx, const std::vector &dependencies) { +inline sycl::event scal(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1 a, + T2* x, int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "scal", "for row_major layout"); } #define SCAL_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event scal(sycl::queue &queue, int64_t n, TYPE1 a, TYPE2 *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event scal(sycl::queue& queue, int64_t n, TYPE1 a, TYPE2* x, int64_t incx, \ + const std::vector& dependencies) { \ return scal(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, a, x, incx, dependencies); \ } SCAL_LAUNCHER_USM(float, float, cublasSscal) @@ -1606,15 +1606,15 @@ SCAL_LAUNCHER_USM(double, std::complex, cublasZdscal) #undef SCAL_LAUNCHER_USM template -inline sycl::event axpy(const char *func_name, Func func, sycl::queue &queue, int64_t n, T alpha, - const T *x, int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event axpy(const char* func_name, Func func, sycl::queue& queue, int64_t n, T alpha, + const T* x, int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpy", "for row_major layout"); } #define AXPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event axpy(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - TYPE *y, int64_t incy, const std::vector &dependencies) { \ + sycl::event axpy(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + TYPE* y, int64_t incy, const std::vector& dependencies) { \ return axpy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, \ dependencies); \ } @@ -1625,38 +1625,38 @@ AXPY_LAUNCHER_USM(std::complex, cublasCaxpy) AXPY_LAUNCHER_USM(std::complex, cublasZaxpy) #undef AXPY_LAUNCHER_USM -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } template -inline sycl::event rotg(const char *func_name, Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, - T1 *s, const std::vector &dependencies) { +inline sycl::event rotg(const char* func_name, Func func, sycl::queue& queue, T1* a, T1* b, T2* c, + T1* s, const std::vector& dependencies) { throw unimplemented("blas", "rotg", "for row_major layout"); } #define ROTG_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event rotg(sycl::queue &queue, TYPE1 *a, TYPE1 *b, TYPE2 *c, TYPE1 *s, \ - const std::vector &dependencies) { \ + sycl::event rotg(sycl::queue& queue, TYPE1* a, TYPE1* b, TYPE2* c, TYPE1* s, \ + const std::vector& dependencies) { \ return rotg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, a, b, c, s, dependencies); \ } @@ -1667,15 +1667,15 @@ ROTG_LAUNCHER_USM(std::complex, double, cublasZrotg) #undef ROTG_LAUNCHER_USM template -inline sycl::event rotm(const char *func_name, Func func, sycl::queue &queue, int64_t n, T *x, - int64_t incx, T *y, int64_t incy, T *param, - const std::vector &dependencies) { +inline sycl::event rotm(const char* func_name, Func func, sycl::queue& queue, int64_t n, T* x, + int64_t incx, T* y, int64_t incy, T* param, + const std::vector& dependencies) { throw unimplemented("blas", "rotm", "for row_major layout"); } #define ROTM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event rotm(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - TYPE *param, const std::vector &dependencies) { \ + sycl::event rotm(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + TYPE* param, const std::vector& dependencies) { \ return rotm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, param, \ dependencies); \ } @@ -1685,15 +1685,15 @@ ROTM_LAUNCHER_USM(double, cublasDrotm) #undef ROTM_LAUNCHER_USM template -inline sycl::event copy(const char *func_name, Func func, sycl::queue &queue, int64_t n, const T *x, - int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event copy(const char* func_name, Func func, sycl::queue& queue, int64_t n, const T* x, + int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "copy", "for row_major layout"); } #define COPY_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event copy(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event copy(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return copy(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1704,16 +1704,16 @@ COPY_LAUNCHER_USM(std::complex, cublasZcopy) #undef COPY_LAUNCHER_USM template -inline sycl::event dot(const char *func_name, Func func, sycl::queue &queue, int64_t n, const T *x, - const int64_t incx, const T *y, int64_t incy, T *result, - const std::vector &dependencies) { +inline sycl::event dot(const char* func_name, Func func, sycl::queue& queue, int64_t n, const T* x, + const int64_t incx, const T* y, int64_t incy, T* result, + const std::vector& dependencies) { throw unimplemented("blas", "dot", "for row_major layout"); } #define DOT_LAUNCHER_USM(EXT, TYPE, CUBLAS_ROUTINE) \ - sycl::event dot##EXT(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - const TYPE *y, const int64_t incy, TYPE *result, \ - const std::vector &dependencies) { \ + sycl::event dot##EXT(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + const TYPE* y, const int64_t incy, TYPE* result, \ + const std::vector& dependencies) { \ return dot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, result, \ dependencies); \ } @@ -1726,16 +1726,16 @@ DOT_LAUNCHER_USM(u, std::complex, cublasZdotu) #undef DOT_LAUNCHER_USM template -inline sycl::event rot(const char *func_name, Func func, sycl::queue &queue, int64_t n, T1 *x, - const int64_t incx, T1 *y, int64_t incy, T2 c, T3 s, - const std::vector &dependencies) { +inline sycl::event rot(const char* func_name, Func func, sycl::queue& queue, int64_t n, T1* x, + const int64_t incx, T1* y, int64_t incy, T2 c, T3 s, + const std::vector& dependencies) { throw unimplemented("blas", "rot", "for row_major layout"); } #define ROT_LAUNCHER_USM(TYPE1, TYPE2, TYPE3, CUBLAS_ROUTINE) \ - sycl::event rot(sycl::queue &queue, int64_t n, TYPE1 *x, const int64_t incx, TYPE1 *y, \ + sycl::event rot(sycl::queue& queue, int64_t n, TYPE1* x, const int64_t incx, TYPE1* y, \ int64_t incy, TYPE2 c, TYPE3 s, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return rot(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s, \ dependencies); \ } @@ -1746,26 +1746,26 @@ ROT_LAUNCHER_USM(std::complex, float, float, cublasCsrot) ROT_LAUNCHER_USM(std::complex, double, double, cublasZdrot) #undef ROT_LAUNCHER_USM -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { throw unimplemented("blas", "sdsdot", "for row_major layout"); } -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { throw unimplemented("blas", "dot", "for row_major layout"); } template -inline sycl::event rotmg(const char *func_name, Func func, sycl::queue &queue, T *d1, T *d2, T *x1, - T y1, T *param, const std::vector &dependencies) { +inline sycl::event rotmg(const char* func_name, Func func, sycl::queue& queue, T* d1, T* d2, T* x1, + T y1, T* param, const std::vector& dependencies) { throw unimplemented("blas", "rotmg", "for row_major layout"); } #define ROTMG_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event rotmg(sycl::queue &queue, TYPE *d1, TYPE *d2, TYPE *x1, TYPE y1, TYPE *param, \ - const std::vector &dependencies) { \ + sycl::event rotmg(sycl::queue& queue, TYPE* d1, TYPE* d2, TYPE* x1, TYPE y1, TYPE* param, \ + const std::vector& dependencies) { \ return rotmg(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, d1, d2, x1, y1, param, dependencies); \ } @@ -1774,15 +1774,15 @@ ROTMG_LAUNCHER_USM(double, cublasDrotmg) #undef ROTMG_LAUNCHER_USM template -inline sycl::event iamax(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T *x, const int64_t incx, int64_t *result, - const std::vector &dependencies) { +inline sycl::event iamax(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T* x, const int64_t incx, int64_t* result, + const std::vector& dependencies) { throw unimplemented("blas", "iamax", "for row_major layout"); } #define IAMAX_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event iamax(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamax(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamax(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } IAMAX_LAUNCHER_USM(float, cublasIsamax) @@ -1792,15 +1792,15 @@ IAMAX_LAUNCHER_USM(std::complex, cublasIzamax) #undef IAMAX_LAUNCHER_USM template -inline sycl::event swap(const char *func_name, Func func, sycl::queue &queue, int64_t n, T *x, - int64_t incx, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event swap(const char* func_name, Func func, sycl::queue& queue, int64_t n, T* x, + int64_t incx, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "swap", "for row_major layout"); } #define SWAP_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event swap(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event swap(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return swap(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1811,15 +1811,15 @@ SWAP_LAUNCHER_USM(std::complex, cublasZswap) #undef SWAP_LAUNCHER_USM template -inline sycl::event iamin(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T *x, const int64_t incx, int64_t *result, - const std::vector &dependencies) { +inline sycl::event iamin(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T* x, const int64_t incx, int64_t* result, + const std::vector& dependencies) { throw unimplemented("blas", "iamin", "for row_major layout"); } #define IAMIN_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event iamin(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamin(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamin(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } IAMIN_LAUNCHER_USM(float, cublasIsamin) @@ -1829,15 +1829,15 @@ IAMIN_LAUNCHER_USM(std::complex, cublasIzamin) #undef IAMIN_LAUNCHER_USM template -inline sycl::event nrm2(const char *func_name, Func func, sycl::queue &queue, int64_t n, - const T1 *x, const int64_t incx, T2 *result, - const std::vector &dependencies) { +inline sycl::event nrm2(const char* func_name, Func func, sycl::queue& queue, int64_t n, + const T1* x, const int64_t incx, T2* result, + const std::vector& dependencies) { throw unimplemented("blas", "nrm2", "for row_major layout"); } #define NRM2_LAUNCHER_USM(TYPE1, TYPE2, CUBLAS_ROUTINE) \ - sycl::event nrm2(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event nrm2(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return nrm2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } NRM2_LAUNCHER_USM(float, float, cublasSnrm2) diff --git a/src/blas/backends/cublas/cublas_level2.cpp b/src/blas/backends/cublas/cublas_level2.cpp index 5ce6e5eaf..904f8d7e6 100644 --- a/src/blas/backends/cublas/cublas_level2.cpp +++ b/src/blas/backends/cublas/cublas_level2.cpp @@ -31,32 +31,32 @@ namespace column_major { // Buffer APIs template -inline void gemv(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gemv(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), m, - n, (cuDataType *)&alpha, a_, lda, x_, incx, - (cuDataType *)&beta, y_, incy); + n, (cuDataType*)&alpha, a_, lda, x_, incx, (cuDataType*)&beta, + y_, incy); }); }); } #define GEMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ gemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, \ incy); \ } @@ -68,32 +68,32 @@ GEMV_LAUNCHER(std::complex, cublasZgemv) #undef GEMV_LAUNCHER template -inline void gbmv(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, int64_t kl, int64_t ku, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gbmv(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, int64_t kl, int64_t ku, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, kl, ku, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), m, - n, kl, ku, (cuDataType *)&alpha, a_, lda, x_, incx, - (cuDataType *)&beta, y_, incy); + n, kl, ku, (cuDataType*)&alpha, a_, lda, x_, incx, + (cuDataType*)&beta, y_, incy); }); }); } #define GBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ - int64_t incx, TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ + int64_t incx, TYPE beta, sycl::buffer& y, int64_t incy) { \ gbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -105,30 +105,30 @@ GBMV_LAUNCHER(std::complex, cublasZgbmv) #undef GBMV_LAUNCHER template -inline void ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void ger(const char* func_name, Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; - cublas_native_named_func(func_name, func, err, handle, m, n, (cuDataType *)&alpha, x_, + cublas_native_named_func(func_name, func, err, handle, m, n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define GER_LAUNCHER(EXT, TYPE, CUBLAS_ROUTINE) \ - void ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, \ + void ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, \ int64_t lda) { \ ger(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda); \ } @@ -142,32 +142,32 @@ GER_LAUNCHER(c, std::complex, cublasZgerc) #undef GER_LAUNCHER template -inline void hbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void hbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, - a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, k, (cuDataType*)&alpha, + a_, lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define HBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -177,32 +177,32 @@ HBMV_LAUNCHER(std::complex, cublasZhbmv) #undef HBMV_LAUNCHER template -inline void hemv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - T beta, sycl::buffer &y, int64_t incy) { +inline void hemv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define HEMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, \ y, incy); \ } @@ -212,31 +212,31 @@ HEMV_LAUNCHER(std::complex, cublasZhemv) #undef HEMV_LAUNCHER template -inline void her(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - ScalarType alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &a, int64_t lda) { +inline void her(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + ScalarType alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& a, int64_t lda) { using cuScalarType = typename CudaEquivalentType::Type; using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + get_cublas_fill_mode(upper_lower), n, (cuScalarType*)&alpha, x_, incx, a_, lda); }); }); } #define HER_LAUNCHER(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - void her(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, \ + void her(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, \ int64_t lda) { \ her(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -247,32 +247,32 @@ HER_LAUNCHER(double, std::complex, cublasZher) #undef HER_LAUNCHER template -inline void her2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void her2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define HER2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ her2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ lda); \ } @@ -283,32 +283,32 @@ HER2_LAUNCHER(std::complex, cublasZher2) #undef HER2_LAUNCHER template -inline void hpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define HPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ hpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, \ incy); \ } @@ -319,30 +319,30 @@ HPMV_LAUNCHER(std::complex, cublasZhpmv) #undef HPMV_LAUNCHER template -inline void hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - ScalarType alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &a) { +inline void hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + ScalarType alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& a) { using cuScalarType = typename CudaEquivalentType::Type; using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + get_cublas_fill_mode(upper_lower), n, (cuScalarType*)&alpha, x_, incx, a_); }); }); } #define HPR_LAUNCHER(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ hpr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -352,32 +352,32 @@ HPR_LAUNCHER(double, std::complex, cublasZhpr) #undef HPR_LAUNCHER template -inline void hpr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void hpr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_); }); }); } #define HPR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ hpr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -387,32 +387,32 @@ HPR2_LAUNCHER(std::complex, cublasZhpr2) #undef HPR2_LAUNCHER template -inline void sbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void sbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, - a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, k, (cuDataType*)&alpha, + a_, lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define SBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ sbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -423,32 +423,32 @@ SBMV_LAUNCHER(double, cublasDsbmv) #undef SBMV_LAUNCHER template -inline void symv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - T beta, sycl::buffer &y, int64_t incy) { +inline void symv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + T beta, sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define SYMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ symv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, \ y, incy); \ } @@ -459,28 +459,28 @@ SYMV_LAUNCHER(double, cublasDsymv) #undef SYMV_LAUNCHER template -inline void syr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { +inline void syr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, a_, lda); }); }); } #define SYR_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { \ + void syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { \ syr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -492,32 +492,32 @@ SYR_LAUNCHER(std::complex, cublasZsyr) #undef SYR_LAUNCHER template -inline void syr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void syr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define SYR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ syr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ lda); \ } @@ -531,32 +531,32 @@ SYR2_LAUNCHER(std::complex, cublasZsyr2) #undef SYR2_LAUNCHER template -inline void spmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void spmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + x_, incx, (cuDataType*)&beta, y_, incy); }); }); } #define SPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ spmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, \ incy); \ } @@ -567,28 +567,28 @@ SPMV_LAUNCHER(double, cublasDspmv) #undef SPMV_LAUNCHER template -inline void spr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void spr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& a) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, a_); }); }); } #define SPR_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ spr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -598,32 +598,32 @@ SPR_LAUNCHER(double, cublasDspr) #undef SPR_LAUNCHER template -inline void spr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void spr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_); }); }); } #define SPR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ spr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -633,18 +633,18 @@ SPR2_LAUNCHER(double, cublasDspr2) #undef SPR2_LAUNCHER template -inline void tbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx) { +inline void tbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -654,8 +654,8 @@ inline void tbmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, \ x, incx); \ @@ -669,18 +669,18 @@ TBMV_LAUNCHER(std::complex, cublasZtbmv) #undef TBMV_LAUNCHER template -inline void tbsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx) { +inline void tbsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -690,8 +690,8 @@ inline void tbsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TBSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, \ x, incx); \ @@ -705,18 +705,18 @@ TBSV_LAUNCHER(std::complex, cublasZtbsv) #undef TBSV_LAUNCHER template -inline void tpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, - sycl::buffer &x, int64_t incx) { +inline void tpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, + sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -726,8 +726,8 @@ inline void tpmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, \ incx); \ } @@ -740,18 +740,18 @@ TPMV_LAUNCHER(std::complex, cublasZtpmv) #undef TPMV_LAUNCHER template -inline void tpsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, - sycl::buffer &x, int64_t incx) { +inline void tpsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, + sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -761,8 +761,8 @@ inline void tpsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TPSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, \ incx); \ } @@ -775,18 +775,18 @@ TPSV_LAUNCHER(std::complex, cublasZtpsv) #undef TPSV_LAUNCHER template -inline void trmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { +inline void trmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -796,8 +796,8 @@ inline void trmv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TRMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, \ incx); \ } @@ -810,18 +810,18 @@ TRMV_LAUNCHER(std::complex, cublasZtrmv) #undef TRMV_LAUNCHER template -inline void trsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { +inline void trsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -831,8 +831,8 @@ inline void trsv(const char *func_name, Func func, sycl::queue &queue, uplo uppe } #define TRSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, \ incx); \ } @@ -847,35 +847,35 @@ TRSV_LAUNCHER(std::complex, cublasZtrsv) // USM APIs template -inline sycl::event gemv(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gemv(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), m, - n, (cuDataType *)&alpha, a_, lda, x_, incx, - (cuDataType *)&beta, y_, incy); + n, (cuDataType*)&alpha, a_, lda, x_, incx, (cuDataType*)&beta, + y_, incy); }); }); return done; } #define GEMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return gemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, \ beta, y, incy, dependencies); \ } @@ -887,36 +887,36 @@ GEMV_LAUNCHER_USM(std::complex, cublasZgemv) #undef GEMV_LAUNCHER_USM template -inline sycl::event gbmv(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, int64_t kl, int64_t ku, T alpha, const T *a, - int64_t lda, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, int64_t kl, int64_t ku, T alpha, const T* a, + int64_t lda, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, kl, ku, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), m, - n, kl, ku, (cuDataType *)&alpha, a_, lda, x_, incx, - (cuDataType *)&beta, y_, incy); + n, kl, ku, (cuDataType*)&alpha, a_, lda, x_, incx, + (cuDataType*)&beta, y_, incy); }); }); return done; } #define GBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ - int64_t ku, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *x, \ - int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ + int64_t ku, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* x, \ + int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return gbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -928,23 +928,23 @@ GBMV_LAUNCHER_USM(std::complex, cublasZgbmv) #undef GBMV_LAUNCHER_USM template -inline sycl::event ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, int64_t n, - T alpha, const T *x, int64_t incx, const T *y, int64_t incy, T *a, - int64_t lda, const std::vector &dependencies) { +inline sycl::event ger(const char* func_name, Func func, sycl::queue& queue, int64_t m, int64_t n, + T alpha, const T* x, int64_t incx, const T* y, int64_t incy, T* a, + int64_t lda, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; - cublas_native_named_func(func_name, func, err, handle, m, n, (cuDataType *)&alpha, x_, + cublas_native_named_func(func_name, func, err, handle, m, n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -952,9 +952,9 @@ inline sycl::event ger(const char *func_name, Func func, sycl::queue &queue, int } #define GER_LAUNCHER_USM(EXT, TYPE, CUBLAS_ROUTINE) \ - sycl::event ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return ger(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -968,35 +968,35 @@ GER_LAUNCHER_USM(c, std::complex, cublasZgerc) #undef GER_LAUNCHER_USM template -inline sycl::event hbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event hbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, - a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, k, (cuDataType*)&alpha, + a_, lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define HBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return hbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -1006,34 +1006,34 @@ HBMV_LAUNCHER_USM(std::complex, cublasZhbmv) #undef HBMV_LAUNCHER_USM template -inline sycl::event hemv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, int64_t lda, const T *x, int64_t incx, - T beta, T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event hemv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, int64_t lda, const T* x, int64_t incx, + T beta, T* y, int64_t incy, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define HEMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -1043,25 +1043,25 @@ HEMV_LAUNCHER_USM(std::complex, cublasZhemv) #undef HEMV_LAUNCHER_USM template -inline sycl::event her(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, const ScalarType alpha, const DataType *x, int64_t incx, - DataType *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event her(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, const ScalarType alpha, const DataType* x, int64_t incx, + DataType* a, int64_t lda, const std::vector& dependencies) { using cuScalarType = typename CudaEquivalentType::Type; using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + get_cublas_fill_mode(upper_lower), n, (cuScalarType*)&alpha, x_, incx, a_, lda); }); }); @@ -1069,9 +1069,9 @@ inline sycl::event her(const char *func_name, Func func, sycl::queue &queue, upl } #define HER_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, \ dependencies); \ } @@ -1082,24 +1082,24 @@ HER_LAUNCHER_USM(double, std::complex, cublasZher) #undef HER_LAUNCHER_USM template -inline sycl::event her2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event her2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, int64_t lda, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -1107,9 +1107,9 @@ inline sycl::event her2(const char *func_name, Func func, sycl::queue &queue, up } #define HER2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, lda, dependencies); \ } @@ -1120,34 +1120,34 @@ HER2_LAUNCHER_USM(std::complex, cublasZher2) #undef HER2_LAUNCHER_USM template -inline sycl::event hpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define HPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, \ beta, y, incy, dependencies); \ } @@ -1158,24 +1158,24 @@ HPMV_LAUNCHER_USM(std::complex, cublasZhpmv) #undef HPMV_LAUNCHER_USM template -inline sycl::event hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, const ScalarType alpha, const DataType *x, int64_t incx, - DataType *a, const std::vector &dependencies) { +inline sycl::event hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, const ScalarType alpha, const DataType* x, int64_t incx, + DataType* a, const std::vector& dependencies) { using cuScalarType = typename CudaEquivalentType::Type; using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuScalarType *)&alpha, + get_cublas_fill_mode(upper_lower), n, (cuScalarType*)&alpha, x_, incx, a_); }); }); @@ -1183,9 +1183,9 @@ inline sycl::event hpr(const char *func_name, Func func, sycl::queue &queue, upl } #define HPR_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, \ + const std::vector& dependencies) { \ return hpr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, \ dependencies); \ } @@ -1196,24 +1196,24 @@ HPR_LAUNCHER_USM(double, std::complex, cublasZhpr) #undef HPR_LAUNCHER_USM template -inline sycl::event hpr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, const std::vector &dependencies) { +inline sycl::event hpr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_); }); }); @@ -1221,9 +1221,9 @@ inline sycl::event hpr2(const char *func_name, Func func, sycl::queue &queue, up } #define HPR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return hpr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, dependencies); \ } @@ -1234,35 +1234,35 @@ HPR2_LAUNCHER_USM(std::complex, cublasZhpr2) #undef HPR2_LAUNCHER_USM template -inline sycl::event sbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event sbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, k, (cuDataType *)&alpha, - a_, lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, k, (cuDataType*)&alpha, + a_, lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define SBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return sbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -1273,34 +1273,34 @@ SBMV_LAUNCHER_USM(double, cublasDsbmv) #undef SBMV_LAUNCHER_USM template -inline sycl::event symv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, int64_t lda, const T *x, int64_t incx, - T beta, T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event symv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, int64_t lda, const T* x, int64_t incx, + T beta, T* y, int64_t incy, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - lda, x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + lda, x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define SYMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return symv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -1311,23 +1311,23 @@ SYMV_LAUNCHER_USM(double, cublasDsymv) #undef SYMV_LAUNCHER_USM template -inline sycl::event syr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, T* a, int64_t lda, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, a_, lda); }); }); @@ -1335,9 +1335,9 @@ inline sycl::event syr(const char *func_name, Func func, sycl::queue &queue, upl } #define SYR_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, \ dependencies); \ } @@ -1350,24 +1350,24 @@ SYR_LAUNCHER_USM(std::complex, cublasZsyr) #undef SYR_LAUNCHER_USM template -inline sycl::event syr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event syr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, int64_t lda, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -1375,9 +1375,9 @@ inline sycl::event syr2(const char *func_name, Func func, sycl::queue &queue, up } #define SYR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, lda, dependencies); \ } @@ -1391,34 +1391,34 @@ SYR2_LAUNCHER_USM(std::complex, cublasZsyr2) #undef SYR2_LAUNCHER_USM template -inline sycl::event spmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event spmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, a_, - x_, incx, (cuDataType *)&beta, y_, incy); + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, a_, + x_, incx, (cuDataType*)&beta, y_, incy); }); }); return done; } #define SPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return spmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, \ beta, y, incy, dependencies); \ } @@ -1429,23 +1429,23 @@ SPMV_LAUNCHER_USM(double, cublasDspmv) #undef SPMV_LAUNCHER_USM template -inline sycl::event spr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, T *a, - const std::vector &dependencies) { +inline sycl::event spr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, T* a, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, a_); }); }); @@ -1453,8 +1453,8 @@ inline sycl::event spr(const char *func_name, Func func, sycl::queue &queue, upl } #define SPR_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, const std::vector &dependencies) { \ + sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, const std::vector& dependencies) { \ return spr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, \ dependencies); \ } @@ -1465,24 +1465,24 @@ SPR_LAUNCHER_USM(double, cublasDspr) #undef SPR_LAUNCHER_USM template -inline sycl::event spr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, const std::vector &dependencies) { +inline sycl::event spr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, - get_cublas_fill_mode(upper_lower), n, (cuDataType *)&alpha, x_, + get_cublas_fill_mode(upper_lower), n, (cuDataType*)&alpha, x_, incx, y_, incy, a_); }); }); @@ -1490,9 +1490,9 @@ inline sycl::event spr2(const char *func_name, Func func, sycl::queue &queue, up } #define SPR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return spr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, dependencies); \ } @@ -1503,21 +1503,21 @@ SPR2_LAUNCHER_USM(double, cublasDspr2) #undef SPR2_LAUNCHER_USM template -inline sycl::event tbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, const T *a, - int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, const T* a, + int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1528,9 +1528,9 @@ inline sycl::event tbmv(const char *func_name, Func func, sycl::queue &queue, up } #define TBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, \ a, lda, x, incx, dependencies); \ } @@ -1543,21 +1543,21 @@ TBMV_LAUNCHER_USM(std::complex, cublasZtbmv) #undef TBMV_LAUNCHER_USM template -inline sycl::event tbsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, const T *a, - int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, const T* a, + int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1568,9 +1568,9 @@ inline sycl::event tbsv(const char *func_name, Func func, sycl::queue &queue, up } #define TBSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, \ a, lda, x, incx, dependencies); \ } @@ -1583,20 +1583,20 @@ TBSV_LAUNCHER_USM(std::complex, cublasZtbsv) #undef TBSV_LAUNCHER_USM template -inline sycl::event tpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1607,9 +1607,9 @@ inline sycl::event tpmv(const char *func_name, Func func, sycl::queue &queue, up } #define TPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ x, incx, dependencies); \ } @@ -1622,20 +1622,20 @@ TPMV_LAUNCHER_USM(std::complex, cublasZtpmv) #undef TPMV_LAUNCHER_USM template -inline sycl::event tpsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1646,9 +1646,9 @@ inline sycl::event tpsv(const char *func_name, Func func, sycl::queue &queue, up } #define TPSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ x, incx, dependencies); \ } @@ -1661,20 +1661,20 @@ TPSV_LAUNCHER_USM(std::complex, cublasZtpsv) #undef TPSV_LAUNCHER_USM template -inline sycl::event trmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event trmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1685,9 +1685,9 @@ inline sycl::event trmv(const char *func_name, Func func, sycl::queue &queue, up } #define TRMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ lda, x, incx, dependencies); \ } @@ -1700,20 +1700,20 @@ TRMV_LAUNCHER_USM(std::complex, cublasZtrmv) #undef TRMV_LAUNCHER_USM template -inline sycl::event trsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event trsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), @@ -1724,9 +1724,9 @@ inline sycl::event trsv(const char *func_name, Func func, sycl::queue &queue, up } #define TRSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ lda, x, incx, dependencies); \ } @@ -1744,16 +1744,16 @@ namespace row_major { // Buffer APIs template -inline void gemv(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gemv(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "gemv", "for row_major layout"); } #define GEMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ gemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, \ incy); \ } @@ -1765,16 +1765,16 @@ GEMV_LAUNCHER(std::complex, cublasZgemv) #undef GEMV_LAUNCHER template -inline void gbmv(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, int64_t kl, int64_t ku, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gbmv(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, int64_t kl, int64_t ku, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx, T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "gbmv", "for row_major layout"); } #define GBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ - int64_t incx, TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ + int64_t incx, TYPE beta, sycl::buffer& y, int64_t incy) { \ gbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -1786,15 +1786,15 @@ GBMV_LAUNCHER(std::complex, cublasZgbmv) #undef GBMV_LAUNCHER template -inline void ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void ger(const char* func_name, Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "ger", "for row_major layout"); } #define GER_LAUNCHER(EXT, TYPE, CUBLAS_ROUTINE) \ - void ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, \ + void ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, \ int64_t lda) { \ ger(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda); \ } @@ -1808,16 +1808,16 @@ GER_LAUNCHER(c, std::complex, cublasZgerc) #undef GER_LAUNCHER template -inline void hbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void hbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "hbmv", "for row_major layout"); } #define HBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -1827,16 +1827,16 @@ HBMV_LAUNCHER(std::complex, cublasZhbmv) #undef HBMV_LAUNCHER template -inline void hemv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - T beta, sycl::buffer &y, int64_t incy) { +inline void hemv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "hemv", "for row_major layout"); } #define HEMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, \ y, incy); \ } @@ -1846,15 +1846,15 @@ HEMV_LAUNCHER(std::complex, cublasZhemv) #undef HEMV_LAUNCHER template -inline void her(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - ScalarType alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &a, int64_t lda) { +inline void her(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + ScalarType alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "her", "for row_major layout"); } #define HER_LAUNCHER(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - void her(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, \ + void her(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, \ int64_t lda) { \ her(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -1865,16 +1865,16 @@ HER_LAUNCHER(double, std::complex, cublasZher) #undef HER_LAUNCHER template -inline void her2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void her2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "her2", "for row_major layout"); } #define HER2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ her2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ lda); \ } @@ -1885,16 +1885,16 @@ HER2_LAUNCHER(std::complex, cublasZher2) #undef HER2_LAUNCHER template -inline void hpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "hpmv", "for row_major layout"); } #define HPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ hpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, \ incy); \ } @@ -1905,15 +1905,15 @@ HPMV_LAUNCHER(std::complex, cublasZhpmv) #undef HPMV_LAUNCHER template -inline void hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - ScalarType alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &a) { +inline void hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + ScalarType alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& a) { throw unimplemented("blas", "hpr", "for row_major layout"); } #define HPR_LAUNCHER(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ hpr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -1923,16 +1923,16 @@ HPR_LAUNCHER(double, std::complex, cublasZhpr) #undef HPR_LAUNCHER template -inline void hpr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void hpr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { throw unimplemented("blas", "hpr2", "for row_major layout"); } #define HPR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ hpr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -1942,16 +1942,16 @@ HPR2_LAUNCHER(std::complex, cublasZhpr2) #undef HPR2_LAUNCHER template -inline void sbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void sbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "sbmv", "for row_major layout"); } #define SBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ sbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, \ beta, y, incy); \ } @@ -1962,16 +1962,16 @@ SBMV_LAUNCHER(double, cublasDsbmv) #undef SBMV_LAUNCHER template -inline void symv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - T beta, sycl::buffer &y, int64_t incy) { +inline void symv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + T beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "symv", "for row_major layout"); } #define SYMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ symv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, \ y, incy); \ } @@ -1982,14 +1982,14 @@ SYMV_LAUNCHER(double, cublasDsymv) #undef SYMV_LAUNCHER template -inline void syr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { +inline void syr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "syr", "for row_major layout"); } #define SYR_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { \ + void syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { \ syr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -2001,16 +2001,16 @@ SYR_LAUNCHER(std::complex, cublasZsyr) #undef SYR_LAUNCHER template -inline void syr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void syr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { throw unimplemented("blas", "syr2", "for row_major layout"); } #define SYR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ syr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ lda); \ } @@ -2024,16 +2024,16 @@ SYR2_LAUNCHER(std::complex, cublasZsyr2) #undef SYR2_LAUNCHER template -inline void spmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void spmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "spmv", "for row_major layout"); } #define SPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ spmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, \ incy); \ } @@ -2044,14 +2044,14 @@ SPMV_LAUNCHER(double, cublasDspmv) #undef SPMV_LAUNCHER template -inline void spr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void spr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& a) { throw unimplemented("blas", "spr", "for row_major layout"); } #define SPR_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ spr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -2061,16 +2061,16 @@ SPR_LAUNCHER(double, cublasDspr) #undef SPR_LAUNCHER template -inline void spr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - T alpha, sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void spr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + T alpha, sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { throw unimplemented("blas", "spr2", "for row_major layout"); } #define SPR2_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ spr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -2080,15 +2080,15 @@ SPR2_LAUNCHER(double, cublasDspr2) #undef SPR2_LAUNCHER template -inline void tbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx) { +inline void tbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "tbmv", "for row_major layout"); } #define TBMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, \ x, incx); \ @@ -2102,15 +2102,15 @@ TBMV_LAUNCHER(std::complex, cublasZtbmv) #undef TBMV_LAUNCHER template -inline void tbsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx) { +inline void tbsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "tbsv", "for row_major layout"); } #define TBSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, \ x, incx); \ @@ -2124,15 +2124,15 @@ TBSV_LAUNCHER(std::complex, cublasZtbsv) #undef TBSV_LAUNCHER template -inline void tpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, - sycl::buffer &x, int64_t incx) { +inline void tpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "tpmv", "for row_major layout"); } #define TPMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, \ incx); \ } @@ -2145,15 +2145,15 @@ TPMV_LAUNCHER(std::complex, cublasZtpmv) #undef TPMV_LAUNCHER template -inline void tpsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, - sycl::buffer &x, int64_t incx) { +inline void tpsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "tpsv", "for row_major layout"); } #define TPSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, \ incx); \ } @@ -2166,15 +2166,15 @@ TPSV_LAUNCHER(std::complex, cublasZtpsv) #undef TPSV_LAUNCHER template -inline void trmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { +inline void trmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "trmv", "for row_major layout"); } #define TRMV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, \ incx); \ } @@ -2187,15 +2187,15 @@ TRMV_LAUNCHER(std::complex, cublasZtrmv) #undef TRMV_LAUNCHER template -inline void trsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, sycl::buffer &a, int64_t lda, - sycl::buffer &x, int64_t incx) { +inline void trsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, sycl::buffer& a, int64_t lda, + sycl::buffer& x, int64_t incx) { throw unimplemented("blas", "trsv", "for row_major layout"); } #define TRSV_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, \ incx); \ } @@ -2210,17 +2210,17 @@ TRSV_LAUNCHER(std::complex, cublasZtrsv) // USM APIs template -inline sycl::event gemv(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gemv(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "gemv", "for row_major layout"); } #define GEMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return gemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, \ beta, y, incy, dependencies); \ } @@ -2232,18 +2232,18 @@ GEMV_LAUNCHER_USM(std::complex, cublasZgemv) #undef GEMV_LAUNCHER_USM template -inline sycl::event gbmv(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, int64_t kl, int64_t ku, T alpha, const T *a, - int64_t lda, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, int64_t kl, int64_t ku, T alpha, const T* a, + int64_t lda, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "gbmv", "for row_major layout"); } #define GBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ - int64_t ku, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *x, \ - int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ + int64_t ku, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* x, \ + int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return gbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2255,16 +2255,16 @@ GBMV_LAUNCHER_USM(std::complex, cublasZgbmv) #undef GBMV_LAUNCHER_USM template -inline sycl::event ger(const char *func_name, Func func, sycl::queue &queue, int64_t m, int64_t n, - T alpha, const T *x, int64_t incx, const T *y, int64_t incy, T *a, - int64_t lda, const std::vector &dependencies) { +inline sycl::event ger(const char* func_name, Func func, sycl::queue& queue, int64_t m, int64_t n, + T alpha, const T* x, int64_t incx, const T* y, int64_t incy, T* a, + int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "ger", "for row_major layout"); } #define GER_LAUNCHER_USM(EXT, TYPE, CUBLAS_ROUTINE) \ - sycl::event ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return ger(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -2278,17 +2278,17 @@ GER_LAUNCHER_USM(c, std::complex, cublasZgerc) #undef GER_LAUNCHER_USM template -inline sycl::event hbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event hbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "hbmv", "for row_major layout"); } #define HBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return hbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2298,16 +2298,16 @@ HBMV_LAUNCHER_USM(std::complex, cublasZhbmv) #undef HBMV_LAUNCHER_USM template -inline sycl::event hemv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, int64_t lda, const T *x, int64_t incx, - T beta, T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event hemv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, int64_t lda, const T* x, int64_t incx, + T beta, T* y, int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "hemv", "for row_major layout"); } #define HEMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hemv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2317,16 +2317,16 @@ HEMV_LAUNCHER_USM(std::complex, cublasZhemv) #undef HEMV_LAUNCHER_USM template -inline sycl::event her(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, const ScalarType alpha, const DataType *x, int64_t incx, - DataType *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event her(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, const ScalarType alpha, const DataType* x, int64_t incx, + DataType* a, int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "her", "for row_major layout"); } #define HER_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, \ dependencies); \ } @@ -2337,16 +2337,16 @@ HER_LAUNCHER_USM(double, std::complex, cublasZher) #undef HER_LAUNCHER_USM template -inline sycl::event her2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event her2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "her2", "for row_major layout"); } #define HER2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, lda, dependencies); \ } @@ -2357,16 +2357,16 @@ HER2_LAUNCHER_USM(std::complex, cublasZher2) #undef HER2_LAUNCHER_USM template -inline sycl::event hpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "hpmv", "for row_major layout"); } #define HPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, \ beta, y, incy, dependencies); \ } @@ -2377,16 +2377,16 @@ HPMV_LAUNCHER_USM(std::complex, cublasZhpmv) #undef HPMV_LAUNCHER_USM template -inline sycl::event hpr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, const ScalarType alpha, const DataType *x, int64_t incx, - DataType *a, const std::vector &dependencies) { +inline sycl::event hpr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, const ScalarType alpha, const DataType* x, int64_t incx, + DataType* a, const std::vector& dependencies) { throw unimplemented("blas", "hpr", "for row_major layout"); } #define HPR_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, CUBLAS_ROUTINE) \ - sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, \ + const std::vector& dependencies) { \ return hpr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, \ dependencies); \ } @@ -2397,16 +2397,16 @@ HPR_LAUNCHER_USM(double, std::complex, cublasZhpr) #undef HPR_LAUNCHER_USM template -inline sycl::event hpr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, const std::vector &dependencies) { +inline sycl::event hpr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, const std::vector& dependencies) { throw unimplemented("blas", "hpr2", "for row_major layout"); } #define HPR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return hpr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, dependencies); \ } @@ -2417,17 +2417,17 @@ HPR2_LAUNCHER_USM(std::complex, cublasZhpr2) #undef HPR2_LAUNCHER_USM template -inline sycl::event sbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event sbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "sbmv", "for row_major layout"); } #define SBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return sbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2438,16 +2438,16 @@ SBMV_LAUNCHER_USM(double, cublasDsbmv) #undef SBMV_LAUNCHER_USM template -inline sycl::event symv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, int64_t lda, const T *x, int64_t incx, - T beta, T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event symv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, int64_t lda, const T* x, int64_t incx, + T beta, T* y, int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "symv", "for row_major layout"); } #define SYMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return symv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, \ incx, beta, y, incy, dependencies); \ } @@ -2458,16 +2458,16 @@ SYMV_LAUNCHER_USM(double, cublasDsymv) #undef SYMV_LAUNCHER_USM template -inline sycl::event syr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, T* a, int64_t lda, + const std::vector& dependencies) { throw unimplemented("blas", "syr", "for row_major layout"); } #define SYR_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, \ dependencies); \ } @@ -2480,16 +2480,16 @@ SYR_LAUNCHER_USM(std::complex, cublasZsyr) #undef SYR_LAUNCHER_USM template -inline sycl::event syr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, int64_t lda, const std::vector &dependencies) { +inline sycl::event syr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "syr2", "for row_major layout"); } #define SYR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, lda, dependencies); \ } @@ -2503,16 +2503,16 @@ SYR2_LAUNCHER_USM(std::complex, cublasZsyr2) #undef SYR2_LAUNCHER_USM template -inline sycl::event spmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *a, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event spmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* a, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "spmv", "for row_major layout"); } #define SPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return spmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, \ beta, y, incy, dependencies); \ } @@ -2523,15 +2523,15 @@ SPMV_LAUNCHER_USM(double, cublasDspmv) #undef SPMV_LAUNCHER_USM template -inline sycl::event spr(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, T *a, - const std::vector &dependencies) { +inline sycl::event spr(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, T* a, + const std::vector& dependencies) { throw unimplemented("blas", "spr", "for row_major layout"); } #define SPR_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, const std::vector &dependencies) { \ + sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, const std::vector& dependencies) { \ return spr(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, \ dependencies); \ } @@ -2542,16 +2542,16 @@ SPR_LAUNCHER_USM(double, cublasDspr) #undef SPR_LAUNCHER_USM template -inline sycl::event spr2(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - int64_t n, T alpha, const T *x, int64_t incx, const T *y, int64_t incy, - T *a, const std::vector &dependencies) { +inline sycl::event spr2(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + int64_t n, T alpha, const T* x, int64_t incx, const T* y, int64_t incy, + T* a, const std::vector& dependencies) { throw unimplemented("blas", "spr2", "for row_major layout"); } #define SPR2_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return spr2(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, \ incy, a, dependencies); \ } @@ -2562,17 +2562,17 @@ SPR2_LAUNCHER_USM(double, cublasDspr2) #undef SPR2_LAUNCHER_USM template -inline sycl::event tbmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, const T *a, - int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, const T* a, + int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tbmv", "for row_major layout"); } #define TBMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, \ a, lda, x, incx, dependencies); \ } @@ -2585,17 +2585,17 @@ TBMV_LAUNCHER_USM(std::complex, cublasZtbmv) #undef TBMV_LAUNCHER_USM template -inline sycl::event tbsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, int64_t k, const T *a, - int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, int64_t k, const T* a, + int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tbsv", "for row_major layout"); } #define TBSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, \ a, lda, x, incx, dependencies); \ } @@ -2608,16 +2608,16 @@ TBSV_LAUNCHER_USM(std::complex, cublasZtbsv) #undef TBSV_LAUNCHER_USM template -inline sycl::event tpmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tpmv", "for row_major layout"); } #define TPMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ x, incx, dependencies); \ } @@ -2630,16 +2630,16 @@ TPMV_LAUNCHER_USM(std::complex, cublasZtpmv) #undef TPMV_LAUNCHER_USM template -inline sycl::event tpsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tpsv", "for row_major layout"); } #define TPSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ x, incx, dependencies); \ } @@ -2652,16 +2652,16 @@ TPSV_LAUNCHER_USM(std::complex, cublasZtpsv) #undef TPSV_LAUNCHER_USM template -inline sycl::event trmv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event trmv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "trmv", "for row_major layout"); } #define TRMV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trmv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ lda, x, incx, dependencies); \ } @@ -2674,16 +2674,16 @@ TRMV_LAUNCHER_USM(std::complex, cublasZtrmv) #undef TRMV_LAUNCHER_USM template -inline sycl::event trsv(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event trsv(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "trsv", "for row_major layout"); } #define TRSV_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trsv(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, \ lda, x, incx, dependencies); \ } diff --git a/src/blas/backends/cublas/cublas_level3.cpp b/src/blas/backends/cublas/cublas_level3.cpp index be634a15c..66aad9c6b 100644 --- a/src/blas/backends/cublas/cublas_level3.cpp +++ b/src/blas/backends/cublas/cublas_level3.cpp @@ -31,33 +31,33 @@ namespace column_major { // Buffer APIs template -inline void gemm(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, int64_t k, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void gemm(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, int64_t k, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_operation(transb), m, n, k, (cuDataType*)&alpha, a_, + lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); } #define GEMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE beta, sycl::buffer& c, \ int64_t ldc) { \ gemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ @@ -72,15 +72,15 @@ GEMM_LAUNCHER(std::complex, cublasZgemm) template -inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::queue &queue, +inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, T_C alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - T_C beta, sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + T_C beta, sycl::buffer& c, int64_t ldc) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; using cuDataType_C = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { if (!verify_support(queue, sycl::aspect::fp16)) { throw oneapi::mkl::unimplemented( "blas", "sycl::half", "half is not supported by the device or the sycl compiler"); @@ -88,17 +88,17 @@ inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::que auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND CUBLAS_ERROR_FUNC_SYNC(cublasGemmEx, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, - a_, DT_A, lda, b_, DT_B, ldb, (cuDataType_C *)&beta, c_, DT_C, - ldc, DT_C, CUBLAS_GEMM_DEFAULT); + get_cublas_operation(transb), m, n, k, (cuDataType_C*)&alpha, a_, + DT_A, lda, b_, DT_B, ldb, (cuDataType_C*)&beta, c_, DT_C, ldc, + DT_C, CUBLAS_GEMM_DEFAULT); #else CUBLAS_ERROR_FUNC(cublasGemmEx, err, handle, get_cublas_operation(transa), get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, @@ -110,9 +110,9 @@ inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::que } #define GEMM_EX_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_C alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE_C beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_C alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE_C beta, sycl::buffer& c, \ int64_t ldc) { \ gemm_ex(CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C, queue, transa, transb, m, n, k, \ alpha, a, lda, b, ldb, beta, c, ldc); \ @@ -123,40 +123,40 @@ GEMM_EX_LAUNCHER(sycl::half, sycl::half, sycl::half, CUDA_R_16F, CUDA_R_16F, CUD #undef GEMM_EX_LAUNCHER -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemm", "for column_major layout"); } template -inline void symm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void symm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_fill_mode(upper_lower), m, n, (cuDataType*)&alpha, + a_, lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); } #define SYMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ symm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, \ b, ldb, beta, c, ldc); \ } @@ -169,33 +169,33 @@ SYMM_LAUNCHER(std::complex, cublasZsymm) #undef SYMM_LAUNCHER template -inline void hemm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void hemm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_fill_mode(upper_lower), m, n, (cuDataType*)&alpha, + a_, lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); } #define HEMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ hemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, \ b, ldb, beta, c, ldc); \ } @@ -205,31 +205,31 @@ HEMM_LAUNCHER(std::complex, cublasZhemm) #undef HEMM_LAUNCHER template -inline void syrk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer &a, int64_t lda, - T beta, sycl::buffer &c, int64_t ldc) { +inline void syrk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer& a, int64_t lda, + T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, (cuDataType *)&beta, c_, + n, k, (cuDataType*)&alpha, a_, lda, (cuDataType*)&beta, c_, ldc); }); }); } #define SYRK_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ syrk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ beta, c, ldc); \ } @@ -242,33 +242,33 @@ SYRK_LAUNCHER(std::complex, cublasZsyrk) #undef SYRK_LAUNCHER template -inline void herk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline void herk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, ScalarType alpha, - sycl::buffer &a, int64_t lda, ScalarType beta, - sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, ScalarType beta, + sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; using cuScalarType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuScalarType *)&alpha, a_, lda, (cuScalarType *)&beta, - c_, ldc); + n, k, (cuScalarType*)&alpha, a_, lda, (cuScalarType*)&beta, c_, + ldc); }); }); } #define HERK_LAUNCHER(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - SCALAR_TYPE alpha, sycl::buffer &a, int64_t lda, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + SCALAR_TYPE alpha, sycl::buffer& a, int64_t lda, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ herk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ beta, c, ldc); \ } @@ -279,34 +279,34 @@ HERK_LAUNCHER(std::complex, double, cublasZherk) #undef HERK_LAUNCHER template -inline void syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, - (cuDataType *)&beta, c_, ldc); + n, k, (cuDataType*)&alpha, a_, lda, b_, ldb, + (cuDataType*)&beta, c_, ldc); }); }); } #define SYR2K_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ syr2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ } @@ -318,36 +318,36 @@ SYR2K_LAUNCHER(std::complex, cublasZsyr2k) #undef SYR2K_LAUNCHER template -inline void her2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline void her2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, DataType alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, ScalarType beta, sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, ScalarType beta, sycl::buffer& c, int64_t ldc) { using cuDataType = typename CudaEquivalentType::Type; using cuScalarType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, - (cuScalarType *)&beta, c_, ldc); + n, k, (cuDataType*)&alpha, a_, lda, b_, ldb, + (cuScalarType*)&beta, c_, ldc); }); }); } #define HER2K_LAUNCHER(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - DATA_TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + DATA_TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ her2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ } @@ -362,31 +362,31 @@ HER2K_LAUNCHER(std::complex, double, cublasZher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline void trmm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline void trmm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, b_, ldb); + get_cublas_diag_type(unit_diag), m, n, (cuDataType*)&alpha, a_, + lda, b_, ldb, b_, ldb); }); }); } #define TRMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trmm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, \ n, alpha, a, lda, b, ldb); \ } @@ -398,31 +398,31 @@ TRMM_LAUNCHER(std::complex, cublasZtrmm) #undef TRMM_LAUNCHER template -inline void trsm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline void trsm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb); + get_cublas_diag_type(unit_diag), m, n, (cuDataType*)&alpha, a_, + lda, b_, ldb); }); }); } #define TRSM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trsm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, \ n, alpha, a, lda, b, ldb); \ } @@ -436,36 +436,36 @@ TRSM_LAUNCHER(std::complex, cublasZtrsm) // USM APIs template -inline sycl::event gemm(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, int64_t k, T alpha, const T *a, - int64_t lda, const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event gemm(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, int64_t k, T alpha, const T* a, + int64_t lda, const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_operation(transb), m, n, k, (cuDataType*)&alpha, a_, + lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); return done; } #define GEMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, \ - int64_t ldb, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, \ + int64_t ldb, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -480,30 +480,30 @@ GEMM_LAUNCHER_USM(std::complex, cublasZgemm) template inline sycl::event gemm_ex_usm(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, - sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, T_C alpha, const T_A *a, int64_t lda, - const T_B *b, int64_t ldb, T_C beta, T_C *c, int64_t ldc, - const std::vector &dependencies) { + sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, T_C alpha, const T_A* a, int64_t lda, + const T_B* b, int64_t ldb, T_C beta, T_C* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; using cuDataType_C = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND CUBLAS_ERROR_FUNC_SYNC(cublasGemmEx, err, handle, get_cublas_operation(transa), - get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, - a_, DT_A, lda, b_, DT_B, ldb, (cuDataType_C *)&beta, c_, DT_C, - ldc, DT_C, CUBLAS_GEMM_DEFAULT); + get_cublas_operation(transb), m, n, k, (cuDataType_C*)&alpha, a_, + DT_A, lda, b_, DT_B, ldb, (cuDataType_C*)&beta, c_, DT_C, ldc, + DT_C, CUBLAS_GEMM_DEFAULT); #else CUBLAS_ERROR_FUNC(cublasGemmEx, err, handle, get_cublas_operation(transa), get_cublas_operation(transb), m, n, k, (cuDataType_C *)&alpha, @@ -517,10 +517,10 @@ inline sycl::event gemm_ex_usm(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C #define GEMM_EX_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, CUDADATATYPE_A, CUDADATATYPE_B, \ CUDADATATYPE_C) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_C alpha, const TYPE_A *a, int64_t lda, const TYPE_B *b, \ - int64_t ldb, TYPE_C beta, TYPE_C *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_C alpha, const TYPE_A* a, int64_t lda, const TYPE_B* b, \ + int64_t ldb, TYPE_C beta, TYPE_C* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm_ex_usm(CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C, queue, transa, transb, \ m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -530,44 +530,44 @@ GEMM_EX_LAUNCHER_USM(sycl::half, sycl::half, sycl::half, CUDA_R_16F, CUDA_R_16F, #undef GEMM_EX_LAUNCHER_USM -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const bfloat16 *a, int64_t lda, const bfloat16 *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const bfloat16* a, int64_t lda, const bfloat16* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", "for column_major layout"); } template -inline sycl::event symm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event symm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_fill_mode(upper_lower), m, n, (cuDataType*)&alpha, + a_, lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); return done; } #define SYMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return symm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, \ a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -580,36 +580,36 @@ SYMM_LAUNCHER_USM(std::complex, cublasZsymm) #undef SYMM_LAUNCHER_USM template -inline sycl::event hemm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event hemm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), - get_cublas_fill_mode(upper_lower), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, (cuDataType *)&beta, c_, ldc); + get_cublas_fill_mode(upper_lower), m, n, (cuDataType*)&alpha, + a_, lda, b_, ldb, (cuDataType*)&beta, c_, ldc); }); }); return done; } #define HEMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return hemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, \ a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -619,24 +619,24 @@ HEMM_LAUNCHER_USM(std::complex, cublasZhemm) #undef HEMM_LAUNCHER_USM template -inline sycl::event syrk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event syrk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, (cuDataType *)&beta, c_, + n, k, (cuDataType*)&alpha, a_, lda, (cuDataType*)&beta, c_, ldc); }); }); @@ -644,9 +644,9 @@ inline sycl::event syrk(const char *func_name, Func func, sycl::queue &queue, up } #define SYRK_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syrk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, beta, c, ldc, dependencies); \ } @@ -659,37 +659,37 @@ SYRK_LAUNCHER_USM(std::complex, cublasZsyrk) #undef SYRK_LAUNCHER_USM template -inline sycl::event herk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline sycl::event herk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, const ScalarType alpha, - const DataType *a, int64_t lda, const ScalarType beta, DataType *c, - int64_t ldc, const std::vector &dependencies) { + const DataType* a, int64_t lda, const ScalarType beta, DataType* c, + int64_t ldc, const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; using cuScalarType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuScalarType *)&alpha, a_, lda, (cuScalarType *)&beta, - c_, ldc); + n, k, (cuScalarType*)&alpha, a_, lda, (cuScalarType*)&beta, c_, + ldc); }); }); return done; } #define HERK_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const SCALAR_TYPE alpha, const DATA_TYPE *a, int64_t lda, \ - const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const SCALAR_TYPE alpha, const DATA_TYPE* a, int64_t lda, \ + const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return herk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, beta, c, ldc, dependencies); \ } @@ -700,37 +700,37 @@ HERK_LAUNCHER_USM(std::complex, double, cublasZherk) #undef HERK_LAUNCHER_USM template -inline sycl::event syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, - (cuDataType *)&beta, c_, ldc); + n, k, (cuDataType*)&alpha, a_, lda, b_, ldb, + (cuDataType*)&beta, c_, ldc); }); }); return done; } #define SYR2K_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syr2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -742,39 +742,39 @@ SYR2K_LAUNCHER_USM(std::complex, cublasZsyr2k) #undef SYR2K_LAUNCHER_USM template -inline sycl::event her2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline sycl::event her2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, const DataType alpha, - const DataType *a, int64_t lda, const DataType *b, int64_t ldb, - const ScalarType beta, DataType *c, int64_t ldc, - const std::vector &dependencies) { + const DataType* a, int64_t lda, const DataType* b, int64_t ldb, + const ScalarType beta, DataType* c, int64_t ldc, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; using cuScalarType = typename CudaEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - n, k, (cuDataType *)&alpha, a_, lda, b_, ldb, - (cuScalarType *)&beta, c_, ldc); + n, k, (cuDataType*)&alpha, a_, lda, b_, ldb, + (cuScalarType*)&beta, c_, ldc); }); }); return done; } #define HER2K_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const DATA_TYPE alpha, const DATA_TYPE *a, int64_t lda, const DATA_TYPE *b, \ - int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const DATA_TYPE alpha, const DATA_TYPE* a, int64_t lda, const DATA_TYPE* b, \ + int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return her2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -789,35 +789,35 @@ HER2K_LAUNCHER_USM(std::complex, double, cublasZher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline sycl::event trmm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline sycl::event trmm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { + T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb, b_, ldb); + get_cublas_diag_type(unit_diag), m, n, (cuDataType*)&alpha, a_, + lda, b_, ldb, b_, ldb); }); }); return done; } #define TRMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trmm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -829,35 +829,35 @@ TRMM_LAUNCHER_USM(std::complex, cublasZtrmm) #undef TRMM_LAUNCHER_USM template -inline sycl::event trsm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline sycl::event trsm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { + T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler &sc) { + onemkl_cublas_host_task(cgh, queue, [=](CublasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cublasStatus_t err; cublas_native_named_func(func_name, func, err, handle, get_cublas_side_mode(left_right), get_cublas_fill_mode(upper_lower), get_cublas_operation(trans), - get_cublas_diag_type(unit_diag), m, n, (cuDataType *)&alpha, - a_, lda, b_, ldb); + get_cublas_diag_type(unit_diag), m, n, (cuDataType*)&alpha, a_, + lda, b_, ldb); }); }); return done; } #define TRSM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trsm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -874,17 +874,17 @@ namespace row_major { // Buffer APIs template -inline void gemm(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, int64_t k, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void gemm(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, int64_t k, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemm", "for row_major layout"); } #define GEMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE beta, sycl::buffer& c, \ int64_t ldc) { \ gemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ @@ -899,17 +899,17 @@ GEMM_LAUNCHER(std::complex, cublasZgemm) template -inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::queue &queue, +inline void gemm_ex(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, T_C alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - T_C beta, sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + T_C beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemm", "for row_major layout"); } #define GEMM_EX_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_C alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE_C beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_C alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE_C beta, sycl::buffer& c, \ int64_t ldc) { \ gemm_ex(CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C, queue, transa, transb, m, n, k, \ alpha, a, lda, b, ldb, beta, c, ldc); \ @@ -920,24 +920,24 @@ GEMM_EX_LAUNCHER(sycl::half, sycl::half, sycl::half, CUDA_R_16F, CUDA_R_16F, CUD #undef GEMM_EX_LAUNCHER -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemm", "for row_major layout"); } template -inline void symm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void symm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "symm", "for row_major layout"); } #define SYMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ symm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, \ b, ldb, beta, c, ldc); \ } @@ -950,17 +950,17 @@ SYMM_LAUNCHER(std::complex, cublasZsymm) #undef SYMM_LAUNCHER template -inline void hemm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void hemm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "hemm", "for row_major layout"); } #define HEMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ hemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, \ b, ldb, beta, c, ldc); \ } @@ -970,16 +970,16 @@ HEMM_LAUNCHER(std::complex, cublasZhemm) #undef HEMM_LAUNCHER template -inline void syrk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer &a, int64_t lda, - T beta, sycl::buffer &c, int64_t ldc) { +inline void syrk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer& a, int64_t lda, + T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "syrk", "for row_major layout"); } #define SYRK_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ syrk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ beta, c, ldc); \ } @@ -992,17 +992,17 @@ SYRK_LAUNCHER(std::complex, cublasZsyrk) #undef SYRK_LAUNCHER template -inline void herk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline void herk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, ScalarType alpha, - sycl::buffer &a, int64_t lda, ScalarType beta, - sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, ScalarType beta, + sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "herk", "for row_major layout"); } #define HERK_LAUNCHER(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - SCALAR_TYPE alpha, sycl::buffer &a, int64_t lda, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + SCALAR_TYPE alpha, sycl::buffer& a, int64_t lda, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ herk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ beta, c, ldc); \ } @@ -1013,17 +1013,17 @@ HERK_LAUNCHER(std::complex, double, cublasZherk) #undef HERK_LAUNCHER template -inline void syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, +inline void syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "syr2k", "for row_major layout"); } #define SYR2K_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ syr2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ } @@ -1035,18 +1035,18 @@ SYR2K_LAUNCHER(std::complex, cublasZsyr2k) #undef SYR2K_LAUNCHER template -inline void her2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline void her2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, DataType alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, ScalarType beta, sycl::buffer &c, int64_t ldc) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, ScalarType beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "her2k", "for row_major layout"); } #define HER2K_LAUNCHER(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - DATA_TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + DATA_TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ her2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, \ ldb, beta, c, ldc); \ } @@ -1061,16 +1061,16 @@ HER2K_LAUNCHER(std::complex, double, cublasZher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline void trmm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline void trmm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { throw unimplemented("blas", "trmm", "for row_major layout"); } #define TRMM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trmm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, \ n, alpha, a, lda, b, ldb); \ } @@ -1082,16 +1082,16 @@ TRMM_LAUNCHER(std::complex, cublasZtrmm) #undef TRMM_LAUNCHER template -inline void trsm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline void trsm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { throw unimplemented("blas", "trsm", "for row_major layout"); } #define TRSM_LAUNCHER(TYPE, CUBLAS_ROUTINE) \ - void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trsm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, \ n, alpha, a, lda, b, ldb); \ } @@ -1105,18 +1105,18 @@ TRSM_LAUNCHER(std::complex, cublasZtrsm) // USM APIs template -inline sycl::event gemm(const char *func_name, Func func, sycl::queue &queue, transpose transa, - transpose transb, int64_t m, int64_t n, int64_t k, T alpha, const T *a, - int64_t lda, const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event gemm(const char* func_name, Func func, sycl::queue& queue, transpose transa, + transpose transb, int64_t m, int64_t n, int64_t k, T alpha, const T* a, + int64_t lda, const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", "for row_major layout"); } #define GEMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, \ - int64_t ldb, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, \ + int64_t ldb, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1131,19 +1131,19 @@ GEMM_LAUNCHER_USM(std::complex, cublasZgemm) template inline sycl::event gemm_ex_usm(DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, - sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, T_C alpha, const T_A *a, int64_t lda, - const T_B *b, int64_t ldb, T_C beta, T_C *c, int64_t ldc, - const std::vector &dependencies) { + sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, T_C alpha, const T_A* a, int64_t lda, + const T_B* b, int64_t ldb, T_C beta, T_C* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", "for row_major layout"); } #define GEMM_EX_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, CUDADATATYPE_A, CUDADATATYPE_B, \ CUDADATATYPE_C) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_C alpha, const TYPE_A *a, int64_t lda, const TYPE_B *b, \ - int64_t ldb, TYPE_C beta, TYPE_C *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_C alpha, const TYPE_A* a, int64_t lda, const TYPE_B* b, \ + int64_t ldb, TYPE_C beta, TYPE_C* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm_ex_usm(CUDADATATYPE_A, CUDADATATYPE_B, CUDADATATYPE_C, queue, transa, transb, \ m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1153,26 +1153,26 @@ GEMM_EX_LAUNCHER_USM(sycl::half, sycl::half, sycl::half, CUDA_R_16F, CUDA_R_16F, #undef GEMM_EX_LAUNCHER_USM -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const bfloat16 *a, int64_t lda, const bfloat16 *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const bfloat16* a, int64_t lda, const bfloat16* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", "for row_major layout"); } template -inline sycl::event symm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event symm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "symm", "for row_major layout"); } #define SYMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return symm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, \ a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1185,18 +1185,18 @@ SYMM_LAUNCHER_USM(std::complex, cublasZsymm) #undef SYMM_LAUNCHER_USM template -inline sycl::event hemm(const char *func_name, Func func, sycl::queue &queue, side left_right, - uplo upper_lower, int64_t m, int64_t n, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event hemm(const char* func_name, Func func, sycl::queue& queue, side left_right, + uplo upper_lower, int64_t m, int64_t n, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "hemm", "for row_major layout"); } #define HEMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return hemm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, \ a, lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1206,16 +1206,16 @@ HEMM_LAUNCHER_USM(std::complex, cublasZhemm) #undef HEMM_LAUNCHER_USM template -inline sycl::event syrk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event syrk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "syrk", "for row_major layout"); } #define SYRK_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syrk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, beta, c, ldc, dependencies); \ } @@ -1228,18 +1228,18 @@ SYRK_LAUNCHER_USM(std::complex, cublasZsyrk) #undef SYRK_LAUNCHER_USM template -inline sycl::event herk(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline sycl::event herk(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, const ScalarType alpha, - const DataType *a, int64_t lda, const ScalarType beta, DataType *c, - int64_t ldc, const std::vector &dependencies) { + const DataType* a, int64_t lda, const ScalarType beta, DataType* c, + int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "herk", "for row_major layout"); } #define HERK_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const SCALAR_TYPE alpha, const DATA_TYPE *a, int64_t lda, \ - const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const SCALAR_TYPE alpha, const DATA_TYPE* a, int64_t lda, \ + const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return herk(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, beta, c, ldc, dependencies); \ } @@ -1250,18 +1250,18 @@ HERK_LAUNCHER_USM(std::complex, double, cublasZherk) #undef HERK_LAUNCHER_USM template -inline sycl::event syr2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, - transpose trans, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syr2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, + transpose trans, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "syr2k", "for row_major layout"); } #define SYR2K_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syr2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1273,19 +1273,19 @@ SYR2K_LAUNCHER_USM(std::complex, cublasZsyr2k) #undef SYR2K_LAUNCHER_USM template -inline sycl::event her2k(const char *func_name, Func func, sycl::queue &queue, uplo upper_lower, +inline sycl::event her2k(const char* func_name, Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, const DataType alpha, - const DataType *a, int64_t lda, const DataType *b, int64_t ldb, - const ScalarType beta, DataType *c, int64_t ldc, - const std::vector &dependencies) { + const DataType* a, int64_t lda, const DataType* b, int64_t ldb, + const ScalarType beta, DataType* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "her2k", "for row_major layout"); } #define HER2K_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, CUBLAS_ROUTINE) \ - sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const DATA_TYPE alpha, const DATA_TYPE *a, int64_t lda, const DATA_TYPE *b, \ - int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const DATA_TYPE alpha, const DATA_TYPE* a, int64_t lda, const DATA_TYPE* b, \ + int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return her2k(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, \ lda, b, ldb, beta, c, ldc, dependencies); \ } @@ -1300,17 +1300,17 @@ HER2K_LAUNCHER_USM(std::complex, double, cublasZher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline sycl::event trmm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline sycl::event trmm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { + T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "trmm", "for row_major layout"); } #define TRMM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trmm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -1322,17 +1322,17 @@ TRMM_LAUNCHER_USM(std::complex, cublasZtrmm) #undef TRMM_LAUNCHER_USM template -inline sycl::event trsm(const char *func_name, Func func, sycl::queue &queue, side left_right, +inline sycl::event trsm(const char* func_name, Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { + T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "trsm", "for row_major layout"); } #define TRSM_LAUNCHER_USM(TYPE, CUBLAS_ROUTINE) \ - sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trsm(#CUBLAS_ROUTINE, CUBLAS_ROUTINE, queue, left_right, upper_lower, trans, \ unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); \ } diff --git a/src/blas/backends/cublas/cublas_scope_handle.cpp b/src/blas/backends/cublas/cublas_scope_handle.cpp index dd12552fb..8bb1145fa 100644 --- a/src/blas/backends/cublas/cublas_scope_handle.cpp +++ b/src/blas/backends/cublas/cublas_scope_handle.cpp @@ -43,7 +43,7 @@ thread_local cublas_handle CublasScopedContextHandler::handle_helper cublas_handle{}; #endif -CublasScopedContextHandler::CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih) +CublasScopedContextHandler::CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih) : ih(ih), needToRecover_(false) { placedContext_ = new sycl::context(queue.get_context()); @@ -73,8 +73,8 @@ CublasScopedContextHandler::~CublasScopedContextHandler() noexcept(false) { delete placedContext_; } -void ContextCallback(void *userData) { - auto *ptr = static_cast *>(userData); +void ContextCallback(void* userData) { + auto* ptr = static_cast*>(userData); if (!ptr) { return; } @@ -92,7 +92,7 @@ void ContextCallback(void *userData) { } } -cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue &queue) { +cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue& queue) { auto cudaDevice = ih.get_native_device(); CUresult cuErr; CUcontext desired; @@ -139,10 +139,10 @@ cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue &queue) return handle; } -CUstream CublasScopedContextHandler::get_stream(const sycl::queue &queue) { +CUstream CublasScopedContextHandler::get_stream(const sycl::queue& queue) { return sycl::get_native(queue); } -sycl::context CublasScopedContextHandler::get_context(const sycl::queue &queue) { +sycl::context CublasScopedContextHandler::get_context(const sycl::queue& queue) { return queue.get_context(); } diff --git a/src/blas/backends/cublas/cublas_scope_handle.hpp b/src/blas/backends/cublas/cublas_scope_handle.hpp index d88124ac1..d17909cfb 100644 --- a/src/blas/backends/cublas/cublas_scope_handle.hpp +++ b/src/blas/backends/cublas/cublas_scope_handle.hpp @@ -85,19 +85,19 @@ the handle must be destroyed when the context goes out of scope. This will bind class CublasScopedContextHandler { CUcontext original_; - sycl::context *placedContext_; + sycl::context* placedContext_; bool needToRecover_; - sycl::interop_handle &ih; + sycl::interop_handle& ih; #ifdef ONEMKL_PI_INTERFACE_REMOVED static thread_local cublas_handle handle_helper; #else static thread_local cublas_handle handle_helper; #endif - CUstream get_stream(const sycl::queue &queue); - sycl::context get_context(const sycl::queue &queue); + CUstream get_stream(const sycl::queue& queue); + sycl::context get_context(const sycl::queue& queue); public: - CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); ~CublasScopedContextHandler() noexcept(false); /** @@ -107,7 +107,7 @@ class CublasScopedContextHandler { * @param queue sycl queue. * @return cublasHandle_t a handle to construct cublas routines */ - cublasHandle_t get_handle(const sycl::queue &queue); + cublasHandle_t get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. template @@ -116,7 +116,7 @@ class CublasScopedContextHandler { return reinterpret_cast(cudaPtr); } - void wait_stream(const sycl::queue &queue) { + void wait_stream(const sycl::queue& queue) { cuStreamSynchronize(get_stream(queue)); } }; diff --git a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp index 20675c212..03c282aed 100644 --- a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp +++ b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.cpp @@ -26,10 +26,10 @@ namespace cublas { thread_local cublas_handle CublasScopedContextHandler::handle_helper = cublas_handle{}; -CublasScopedContextHandler::CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih) +CublasScopedContextHandler::CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih) : interop_h(ih) {} -cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue &queue) { +cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue& queue) { sycl::device device = queue.get_device(); int current_device = interop_h.get_native_device(); CUstream streamId = get_stream(queue); @@ -64,7 +64,7 @@ cublasHandle_t CublasScopedContextHandler::get_handle(const sycl::queue &queue) return handle; } -CUstream CublasScopedContextHandler::get_stream(const sycl::queue &queue) { +CUstream CublasScopedContextHandler::get_stream(const sycl::queue& queue) { return interop_h.get_native_queue(); } diff --git a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp index c7ec3e520..9e1eb89e5 100644 --- a/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp +++ b/src/blas/backends/cublas/cublas_scope_handle_hipsycl.hpp @@ -61,13 +61,13 @@ the handle must be destroyed when the context goes out of scope. This will bind class CublasScopedContextHandler { sycl::interop_handle interop_h; static thread_local cublas_handle handle_helper; - sycl::context get_context(const sycl::queue &queue); - CUstream get_stream(const sycl::queue &queue); + sycl::context get_context(const sycl::queue& queue); + CUstream get_stream(const sycl::queue& queue); public: - CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + CublasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); - cublasHandle_t get_handle(const sycl::queue &queue); + cublasHandle_t get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. diff --git a/src/blas/backends/cublas/cublas_task.hpp b/src/blas/backends/cublas/cublas_task.hpp index b3887fcd4..08d5cf70e 100644 --- a/src/blas/backends/cublas/cublas_task.hpp +++ b/src/blas/backends/cublas/cublas_task.hpp @@ -58,7 +58,7 @@ namespace cublas { #ifdef __HIPSYCL__ template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { cgh.hipSYCL_enqueue_custom_operation([f, queue](sycl::interop_handle ih) { auto sc = CublasScopedContextHandler(queue, ih); f(sc); @@ -66,9 +66,9 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } #else template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih) { #else cgh.host_task([f, queue](sycl::interop_handle ih) { #endif @@ -78,7 +78,7 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } #endif template -static inline void onemkl_cublas_host_task(H &cgh, sycl::queue queue, F f) { +static inline void onemkl_cublas_host_task(H& cgh, sycl::queue queue, F f) { (void)host_task_internal(cgh, queue, f); } diff --git a/src/blas/backends/mkl_common/mkl_batch.cxx b/src/blas/backends/mkl_common/mkl_batch.cxx index 6358a3922..4bd9076b8 100644 --- a/src/blas/backends/mkl_common/mkl_batch.cxx +++ b/src/blas/backends/mkl_common/mkl_batch.cxx @@ -19,347 +19,347 @@ // Buffer APIs -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - std::int64_t stridex, sycl::buffer &y, int64_t incy, std::int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + std::int64_t stridex, sycl::buffer& y, int64_t incy, std::int64_t stridey, std::int64_t batch_size) { blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - std::int64_t stridex, sycl::buffer &y, int64_t incy, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + std::int64_t stridex, sycl::buffer& y, int64_t incy, std::int64_t stridey, std::int64_t batch_size) { blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, int64_t incy, std::int64_t stridey, std::int64_t batch_size) { blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, int64_t incy, std::int64_t stridey, std::int64_t batch_size) { blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, float beta, sycl::buffer &y, int64_t incy, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, float beta, sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_size); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, double beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, double beta, + sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_size); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, - int64_t stride_x, std::complex beta, sycl::buffer, 1> &y, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_size); } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, + sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_size); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, sycl::buffer &x, - int64_t incx, int64_t stride_x, sycl::buffer &c, int64_t ldc, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, stride_c, batch_size); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, stride_c, batch_size); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, stride_c, batch_size); } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, double beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, double beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, - int64_t stride_b, std::complex beta, sycl::buffer, 1> &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, + int64_t stride_b, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { throw unimplemented("blas", "gemm_batch", "unsupported dtype combination: int8_t, int8_t, float, float"); } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); @@ -367,641 +367,641 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 // USM APIs -sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t incx, - std::int64_t stridex, float *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, + std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - std::int64_t stridex, double *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::int64_t stridex, std::complex *y, int64_t incy, +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::copy_batch(queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, float **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, double **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, int64_t *incx, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { return blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { return blas_major::copy_batch(queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - int64_t stridex, float *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { return blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { return blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { return blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { return blas_major::axpy_batch(queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { return blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::axpy_batch(queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float beta, float *y, int64_t incy, int64_t stride_y, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y, + int64_t batch_size, const std::vector& dependencies) { return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_size, dependencies); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, const double *x, - int64_t incx, int64_t stride_x, double beta, double *y, int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, const double* x, + int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_size, dependencies); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_size, dependencies); } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, stride_a, x, incx, stride_x, beta, y, incy, stride_y, batch_size, dependencies); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, const float **x, int64_t *incx, float *beta, - float **y, int64_t *incy, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta, + float** y, int64_t* incy, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, groupsize, dependencies); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, const double **x, int64_t *incx, - double *beta, double **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, const double** x, int64_t* incx, + double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, groupsize, dependencies); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, groupsize, dependencies); } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { return blas_major::gemv_batch(queue, transa, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, groupsize, dependencies); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const float *a, - int64_t lda, int64_t stride_a, const float *x, int64_t incx, - int64_t stride_x, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, const double *a, - int64_t lda, int64_t stride_a, const double *x, int64_t incx, - int64_t stride_x, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stride_a, const double* x, int64_t incx, + int64_t stride_x, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies); } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, stride_a, x, incx, stride_x, c, ldc, stride_c, batch_size, dependencies); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, groupsize, dependencies); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, double **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, groupsize, dependencies); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, groupsize, dependencies); } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, const std::complex **x, - int64_t *incx, std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { return blas_major::dgmm_batch(queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, groupsize, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, int64_t stride_a, - const float *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, int64_t stride_a, + const float* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, int64_t stride_a, - const double *b, int64_t ldb, int64_t stride_b, double beta, double *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, int64_t stride_a, + const double* b, int64_t ldb, int64_t stride_b, double beta, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, const std::complex *b, int64_t ldb, - int64_t stride_b, std::complex beta, std::complex *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, - int64_t stride_a, const sycl::half *b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::half *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, + int64_t stride_a, const sycl::half* b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, int64_t stride_a, - const sycl::half *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, int64_t stride_a, + const sycl::half* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", "unsupported dtype combination: int8_t, int8_t, float, float"); } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, - std::int32_t *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, + std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, - const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const float** a, int64_t* lda, + const float** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, - const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, double* alpha, const double** a, int64_t* lda, + const double** b, int64_t* ldb, double* beta, double** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, const std::complex **b, - int64_t *ldb, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, const std::complex **b, - int64_t *ldb, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, sycl::half *alpha, const sycl::half **a, - int64_t *lda, const sycl::half **b, int64_t *ldb, sycl::half *beta, - sycl::half **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, sycl::half* alpha, const sycl::half** a, + int64_t* lda, const sycl::half** b, int64_t* ldb, sycl::half* beta, + sycl::half** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const sycl::half **a, int64_t *lda, - const sycl::half **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const sycl::half** a, int64_t* lda, + const sycl::half** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", "unsupported dtype combination: int8_t, int8_t, float, float"); } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, std::int32_t **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, std::int32_t** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::gemm_batch(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, groupsize, dependencies); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, int64_t stride_a, float *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, int64_t stride_a, double *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - std::complex *b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, float *alpha, const float **a, - int64_t *lda, float **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, float* alpha, const float** a, + int64_t* lda, float** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, double *alpha, const double **a, - int64_t *lda, double **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, double* alpha, const double** a, + int64_t* lda, double** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex **b, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, transpose *trans, - diag *unit_diag, int64_t *m, int64_t *n, std::complex *alpha, - const std::complex **a, int64_t *lda, std::complex **b, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::trsm_batch(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, int64_t stride_a, float beta, - float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, int64_t stride_a, double beta, - double *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex beta, std::complex *c, +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies); } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { return blas_major::syrk_batch(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { return blas_major::omatcopy_batch(queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::imatcopy_batch(queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { return blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { return blas_major::omatadd_batch(queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } diff --git a/src/blas/backends/mkl_common/mkl_blas_backend.hxx b/src/blas/backends/mkl_common/mkl_blas_backend.hxx index 10e441bd7..ca0c036f1 100644 --- a/src/blas/backends/mkl_common/mkl_blas_backend.hxx +++ b/src/blas/backends/mkl_common/mkl_blas_backend.hxx @@ -19,1351 +19,1365 @@ /// level3, buffer -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, + std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); - -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); + +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb); +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); // level 3, USM -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, value_or_pointer beta, - float *c, std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, value_or_pointer beta, + float* c, std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, value_or_pointer beta, - double *c, std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, value_or_pointer beta, + double* c, std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - value_or_pointer beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, value_or_pointer beta, - float *c, std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + value_or_pointer beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, + value_or_pointer beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, + value_or_pointer beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, + value_or_pointer beta, bfloat16* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const bfloat16 *a, std::int64_t lda, const bfloat16 *b, - std::int64_t ldb, value_or_pointer beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::int8_t* a, std::int64_t lda, const std::int8_t* b, std::int64_t ldb, + value_or_pointer beta, std::int32_t* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const bfloat16 *a, std::int64_t lda, const bfloat16 *b, - std::int64_t ldb, value_or_pointer beta, bfloat16 *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const std::int8_t *a, - std::int64_t lda, const std::int8_t *b, std::int64_t ldb, - value_or_pointer beta, std::int32_t *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const std::int8_t *a, - std::int64_t lda, const std::int8_t *b, std::int64_t ldb, - value_or_pointer beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, value_or_pointer beta, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, value_or_pointer beta, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, + const std::int8_t* a, std::int64_t lda, const std::int8_t* b, std::int64_t ldb, + value_or_pointer beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, value_or_pointer beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, value_or_pointer beta, double* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const float *a, std::int64_t lda, - value_or_pointer beta, float *c, std::int64_t ldc, - const std::vector &dependencies = {}); +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const float* a, std::int64_t lda, + value_or_pointer beta, float* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const double *a, std::int64_t lda, - value_or_pointer beta, double *c, std::int64_t ldc, - const std::vector &dependencies = {}); +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const double* a, std::int64_t lda, + value_or_pointer beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, - value_or_pointer> beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + value_or_pointer> beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, - value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const std::complex *a, - std::int64_t lda, value_or_pointer beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const std::complex *a, - std::int64_t lda, value_or_pointer beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, value_or_pointer beta, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, value_or_pointer beta, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); - -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, + const std::complex* a, std::int64_t lda, + value_or_pointer> beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const std::complex* a, + std::int64_t lda, value_or_pointer beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const std::complex* a, + std::int64_t lda, value_or_pointer beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, value_or_pointer beta, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, value_or_pointer beta, double* c, + std::int64_t ldc, const std::vector& dependencies = {}); + +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies = {}); + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); // level 2, buffer -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy); +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); - -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - -void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - -void her(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - -void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - -void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); + +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); + +void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +void her(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + +void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); -void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); -void hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); +void hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -void hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); +void hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); -void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); -void symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, std::int64_t lda); +void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda); -void syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); +void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda); +void syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); -void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); +void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); -void spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); +void spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); -void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a); -void spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a); +void spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); -void spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); +void spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - std::int64_t k, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + std::int64_t k, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx); +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); // level 2, USM -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, value_or_pointer beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, value_or_pointer beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, value_or_pointer alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, value_or_pointer beta, - float *y, std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, value_or_pointer alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, value_or_pointer beta, - double *y, std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, value_or_pointer alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, value_or_pointer beta, + float* y, std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, value_or_pointer alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, value_or_pointer> beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, + const std::complex* a, std::int64_t lda, const std::complex* x, std::int64_t incx, value_or_pointer> beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hemv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event her2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, const std::vector &dependencies = {}); - -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *a, const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - value_or_pointer alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, value_or_pointer beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, std::int64_t k, - value_or_pointer alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, value_or_pointer beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *a, std::int64_t lda, const float *x, std::int64_t incx, - value_or_pointer beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event symv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, - value_or_pointer beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event syr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, - std::int64_t lda, const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *a, const float *x, std::int64_t incx, value_or_pointer beta, - float *y, std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event spmv(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *a, const double *x, std::int64_t incx, value_or_pointer beta, - double *y, std::int64_t incy, const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, float *a, - const std::vector &dependencies = {}); - -sycl::event spr(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, double *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, float *a, - const std::vector &dependencies = {}); - -sycl::event spr2(sycl::queue &queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, const double *y, std::int64_t incy, double *a, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); - -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies = {}); + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, value_or_pointer alpha, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, float* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, value_or_pointer alpha, + const double* x, std::int64_t incx, const double* y, std::int64_t incy, double* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hemv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event her(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event her2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies = {}); + +sycl::event hpr(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, const std::vector& dependencies = {}); + +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* a, const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + value_or_pointer alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, std::int64_t k, + value_or_pointer alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event symv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, + const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* x, std::int64_t incx, double* a, + std::int64_t lda, const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event syr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const float* a, const float* x, std::int64_t incx, + value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spmv(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* a, const double* x, + std::int64_t incx, value_or_pointer beta, double* y, std::int64_t incy, + const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, value_or_pointer alpha, + const float* x, std::int64_t incx, float* a, + const std::vector& dependencies = {}); + +sycl::event spr(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* a, const std::vector& dependencies = {}); + +sycl::event spr2(sycl::queue& queue, uplo upper_lower, std::int64_t n, + value_or_pointer alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); + +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies = {}); // level 1, buffer -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result, index_base base=index_base::zero); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result, index_base base = index_base::zero); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result, index_base base=index_base::zero); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result, index_base base = index_base::zero); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result, - index_base base=index_base::zero); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result, + index_base base = index_base::zero); -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result, - index_base base=index_base::zero); +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result, + index_base base = index_base::zero); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result, index_base base=index_base::zero); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result, index_base base = index_base::zero); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result, index_base base=index_base::zero); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result, index_base base = index_base::zero); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result, - index_base base=index_base::zero); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result, + index_base base = index_base::zero); -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result, - index_base base=index_base::zero); +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result, + index_base base = index_base::zero); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void axpy(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void sdsdot(sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); +void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result); +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result); +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, float c, float s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, double c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, double c, double s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, float c, float s); -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s); +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, double c, double s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s); +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param); -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m); +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param); -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m); +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param); -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer, 1>& x, std::int64_t incx); -void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); +void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy); +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy); // level 1, USM -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies = {}); +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies = {}); +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies = {}); +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies = {}); -sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer> alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event axpy(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event axpy(sycl::queue& queue, std::int64_t n, value_or_pointer> alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, const float *x, - std::int64_t incx, value_or_pointer beta, float *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, const float* x, + std::int64_t incx, value_or_pointer beta, float* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, const double *x, - std::int64_t incx, value_or_pointer beta, double *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, + const double* x, std::int64_t incx, value_or_pointer beta, double* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer> alpha, + const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event axpby(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, - value_or_pointer> beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event axpby(sycl::queue& queue, std::int64_t n, value_or_pointer> alpha, + const std::complex* x, std::int64_t incx, + value_or_pointer> beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies = {}); +sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies = {}); -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, double *result, - const std::vector &dependencies = {}); +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, - const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, + const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies = {}); -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies = {}); +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, value_or_pointer c, - value_or_pointer s, const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, value_or_pointer c, + value_or_pointer s, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, value_or_pointer c, - value_or_pointer s, const std::vector &dependencies = {}); +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, value_or_pointer c, + value_or_pointer s, const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, +sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, std::int64_t incy, value_or_pointer c, value_or_pointer s, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, +sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, std::int64_t incy, value_or_pointer c, value_or_pointer s, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, float *c, - std::complex *s, const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies = {}); -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, double *c, - std::complex *s, const std::vector &dependencies = {}); +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, const float *param, - const std::vector &dependencies = {}); +sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, const float* param, + const std::vector& dependencies = {}); -sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, const double *param, - const std::vector &dependencies = {}); +sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, const double* param, + const std::vector& dependencies = {}); -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, value_or_pointer y1, - float *param, const std::vector &dependencies = {}); +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, value_or_pointer y1, + float* param, const std::vector& dependencies = {}); -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, value_or_pointer y1, - double *param, const std::vector &dependencies = {}); +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, + value_or_pointer y1, double* param, + const std::vector& dependencies = {}); #define ONEMKL_DECLARE_SCAL(T, Ts) \ - sycl::event scal(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, T *x, \ - std::int64_t incx, const std::vector &dependencies = {}); + sycl::event scal(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, T* x, \ + std::int64_t incx, const std::vector& dependencies = {}); ONEMKL_DECLARE_SCAL(float, float) ONEMKL_DECLARE_SCAL(double, double) @@ -1371,1124 +1385,1134 @@ ONEMKL_DECLARE_SCAL(std::complex, std::complex) ONEMKL_DECLARE_SCAL(std::complex, std::complex) ONEMKL_DECLARE_SCAL(std::complex, float) ONEMKL_DECLARE_SCAL(std::complex, double) -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies = {}); +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies = {}); #undef ONEMKL_DECLARE_SCAL -sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies = {}); +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies = {}); // extensions, buffer -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc); +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int8_t ao, - sycl::buffer &b, std::int64_t ldb, std::uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, std::int8_t ao, + sycl::buffer& b, std::int64_t ldb, std::uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int8_t ao, - sycl::buffer &b, std::int64_t ldb, std::int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, std::int8_t ao, + sycl::buffer& b, std::int64_t ldb, std::int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::uint8_t ao, - sycl::buffer &b, std::int64_t ldb, std::int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, std::uint8_t ao, + sycl::buffer& b, std::int64_t ldb, std::int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::uint8_t ao, - sycl::buffer &b, std::int64_t ldb, std::uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, std::uint8_t ao, + sycl::buffer& b, std::int64_t ldb, std::uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); // extensions, USM -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, value_or_pointer beta, - float *c, std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, value_or_pointer beta, + float* c, std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, value_or_pointer beta, - double *c, std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + std::int64_t n, std::int64_t k, value_or_pointer alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + value_or_pointer beta, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, value_or_pointer> beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, value_or_pointer> beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, + const std::complex* a, std::int64_t lda, const std::complex* b, std::int64_t ldb, value_or_pointer> beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, value_or_pointer beta, std::int32_t *c, - std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, value_or_pointer beta, + std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, value_or_pointer beta, std::int32_t *c, - std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies = {}); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, value_or_pointer beta, + std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies = {}); -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - value_or_pointer beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + value_or_pointer beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, - const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - value_or_pointer beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies = {}); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, + const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + value_or_pointer beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies = {}); // batch, buffer -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, float beta, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, sycl::buffer, 1> &c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -void dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer &c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, +void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, +void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &ab, std::int64_t lda, std::int64_t ldb, +void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -void imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb); +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb); +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, std::int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb); +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, std::int64_t lda, +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // batch, usm -sycl::event syrk_batch(sycl::queue &queue, const uplo *upper_lower, const transpose *trans, - const std::int64_t *n, const std::int64_t *k, const float *alpha, - const float **a, const std::int64_t *lda, const float *beta, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, const uplo *upper_lower, const transpose *trans, - const std::int64_t *n, const std::int64_t *k, const double *alpha, - const double **a, const std::int64_t *lda, const double *beta, double **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, const uplo *upper_lower, const transpose *trans, - const std::int64_t *n, const std::int64_t *k, - const std::complex *alpha, const std::complex **a, - const std::int64_t *lda, const std::complex *beta, - std::complex **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, const uplo *upper_lower, const transpose *trans, - const std::int64_t *n, const std::int64_t *k, - const std::complex *alpha, const std::complex **a, - const std::int64_t *lda, const std::complex *beta, - std::complex **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, value_or_pointer beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, value_or_pointer alpha, const double *a, +sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans, + const std::int64_t* n, const std::int64_t* k, const float* alpha, + const float** a, const std::int64_t* lda, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans, + const std::int64_t* n, const std::int64_t* k, const double* alpha, + const double** a, const std::int64_t* lda, const double* beta, double** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans, + const std::int64_t* n, const std::int64_t* k, + const std::complex* alpha, const std::complex** a, + const std::int64_t* lda, const std::complex* beta, + std::complex** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, const uplo* upper_lower, const transpose* trans, + const std::int64_t* n, const std::int64_t* k, + const std::complex* alpha, const std::complex** a, + const std::int64_t* lda, const std::complex* beta, + std::complex** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, value_or_pointer beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, value_or_pointer alpha, const double* a, std::int64_t lda, std::int64_t stride_a, value_or_pointer beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - value_or_pointer> beta, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + value_or_pointer> beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, + const std::vector& dependencies = {}); +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - value_or_pointer> beta, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + value_or_pointer> beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, const std::int64_t *n, const float **x, - const std::int64_t *incx, float **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, const std::int64_t *n, const double **x, - const std::int64_t *incx, double **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, const std::int64_t *n, const std::complex **x, - const std::int64_t *incx, std::complex **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event copy_batch(sycl::queue &queue, const std::int64_t *n, const std::complex **x, - const std::int64_t *incx, std::complex **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float *c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const float** x, + const std::int64_t* incx, float** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const double** x, + const std::int64_t* incx, double** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex** x, + const std::int64_t* incx, std::complex** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event copy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex** x, + const std::int64_t* incx, std::complex** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double *c, std::int64_t ldc, +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, const std::int64_t *m, - const std::int64_t *n, const float **a, const std::int64_t *lda, - const float **x, const std::int64_t *incx, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, const std::int64_t *m, - const std::int64_t *n, const double **a, const std::int64_t *lda, - const double **x, const std::int64_t *incx, double **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, const std::int64_t *m, - const std::int64_t *n, const std::complex **a, - const std::int64_t *lda, const std::complex **x, - const std::int64_t *incx, std::complex **c, const std::int64_t *ldc, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event dgmm_batch(sycl::queue &queue, const side *left_right, const std::int64_t *m, - const std::int64_t *n, const std::complex **a, - const std::int64_t *lda, const std::complex **x, - const std::int64_t *incx, std::complex **c, const std::int64_t *ldc, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, value_or_pointer beta, float *y, std::int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, value_or_pointer beta, double *y, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m, + const std::int64_t* n, const float** a, const std::int64_t* lda, + const float** x, const std::int64_t* incx, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m, + const std::int64_t* n, const double** a, const std::int64_t* lda, + const double** x, const std::int64_t* incx, double** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m, + const std::int64_t* n, const std::complex** a, + const std::int64_t* lda, const std::complex** x, + const std::int64_t* incx, std::complex** c, const std::int64_t* ldc, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event dgmm_batch(sycl::queue& queue, const side* left_right, const std::int64_t* m, + const std::int64_t* n, const std::complex** a, + const std::int64_t* lda, const std::complex** x, + const std::int64_t* incx, std::complex** c, const std::int64_t* ldc, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, value_or_pointer beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, value_or_pointer beta, double* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, - value_or_pointer> beta, std::complex *y, + value_or_pointer> beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemv_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, +sycl::event gemv_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, - value_or_pointer> beta, std::complex *y, + value_or_pointer> beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const float *alpha, const float **a, - const std::int64_t *lda, const float **x, const std::int64_t *incx, - const float *beta, float **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const double *alpha, const double **a, - const std::int64_t *lda, const double **x, const std::int64_t *incx, - const double *beta, double **y, const std::int64_t *incy, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - const std::complex **x, const std::int64_t *incx, - const std::complex *beta, std::complex **y, - const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event gemv_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - const std::complex **x, const std::int64_t *incx, - const std::complex *beta, std::complex **y, - const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, const std::int64_t *n, const double *alpha, - const double **x, const std::int64_t *incx, double **y, - const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, const std::int64_t *n, const float *alpha, - const float **x, const std::int64_t *incx, float **y, - const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, const std::int64_t *n, const std::complex *alpha, - const std::complex **x, const std::int64_t *incx, - std::complex **y, const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, const std::int64_t *n, const std::complex *alpha, - const std::complex **x, const std::int64_t *incx, - std::complex **y, const std::int64_t *incy, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const float* alpha, const float** a, + const std::int64_t* lda, const float** x, const std::int64_t* incx, + const float* beta, float** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const double* alpha, const double** a, + const std::int64_t* lda, const double** x, const std::int64_t* incx, + const double* beta, double** y, const std::int64_t* incy, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + const std::complex** x, const std::int64_t* incx, + const std::complex* beta, std::complex** y, + const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event gemv_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + const std::complex** x, const std::int64_t* incx, + const std::complex* beta, std::complex** y, + const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const double* alpha, + const double** x, const std::int64_t* incx, double** y, + const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const float* alpha, + const float** x, const std::int64_t* incx, float** y, + const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex* alpha, + const std::complex** x, const std::int64_t* incx, + std::complex** y, const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, const std::int64_t* n, const std::complex* alpha, + const std::complex** x, const std::int64_t* incx, + std::complex** y, const std::int64_t* incy, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, value_or_pointer alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, value_or_pointer alpha, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, value_or_pointer> alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - value_or_pointer> alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const float **a, const std::int64_t *lda, - const float **b, const std::int64_t *ldb, const float *beta, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const double *alpha, const double **a, const std::int64_t *lda, - const double **b, const std::int64_t *ldb, const double *beta, double **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const std::complex *alpha, const std::complex **a, - const std::int64_t *lda, const std::complex **b, - const std::int64_t *ldb, const std::complex *beta, - std::complex **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const std::complex *alpha, const std::complex **a, - const std::int64_t *lda, const std::complex **b, - const std::int64_t *ldb, const std::complex *beta, - std::complex **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const sycl::half *alpha, const sycl::half **a, const std::int64_t *lda, - const sycl::half **b, const std::int64_t *ldb, const sycl::half *beta, - sycl::half **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const sycl::half **a, const std::int64_t *lda, - const sycl::half **b, const std::int64_t *ldb, const float *beta, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const bfloat16 **a, const std::int64_t *lda, - const bfloat16 **b, const std::int64_t *ldb, const float *beta, - bfloat16 **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const bfloat16 **a, const std::int64_t *lda, - const bfloat16 **b, const std::int64_t *ldb, const float *beta, - float **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const std::int8_t **a, const std::int64_t *lda, - const std::int8_t **b, const std::int64_t *ldb, const float *beta, - std::int32_t **c, const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, const transpose *transa, const transpose *transb, - const std::int64_t *m, const std::int64_t *n, const std::int64_t *k, - const float *alpha, const std::int8_t **a, const std::int64_t *lda, - const std::int8_t **b, const std::int64_t *ldb, const float *beta, float **c, - const std::int64_t *ldc, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, value_or_pointer beta, float *c, std::int64_t ldc, - std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, value_or_pointer alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, value_or_pointer beta, double *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, + value_or_pointer> alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, + std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const float** a, const std::int64_t* lda, + const float** b, const std::int64_t* ldb, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const double* alpha, const double** a, const std::int64_t* lda, + const double** b, const std::int64_t* ldb, const double* beta, double** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const std::complex* alpha, const std::complex** a, + const std::int64_t* lda, const std::complex** b, + const std::int64_t* ldb, const std::complex* beta, + std::complex** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const std::complex* alpha, const std::complex** a, + const std::int64_t* lda, const std::complex** b, + const std::int64_t* ldb, const std::complex* beta, + std::complex** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const sycl::half* alpha, const sycl::half** a, const std::int64_t* lda, + const sycl::half** b, const std::int64_t* ldb, const sycl::half* beta, + sycl::half** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const sycl::half** a, const std::int64_t* lda, + const sycl::half** b, const std::int64_t* ldb, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const bfloat16** a, const std::int64_t* lda, + const bfloat16** b, const std::int64_t* ldb, const float* beta, bfloat16** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const bfloat16** a, const std::int64_t* lda, + const bfloat16** b, const std::int64_t* ldb, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const std::int8_t** a, const std::int64_t* lda, + const std::int8_t** b, const std::int64_t* ldb, const float* beta, + std::int32_t** c, const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, const transpose* transa, const transpose* transb, + const std::int64_t* m, const std::int64_t* n, const std::int64_t* k, + const float* alpha, const std::int8_t** a, const std::int64_t* lda, + const std::int8_t** b, const std::int64_t* ldb, const float* beta, float** c, + const std::int64_t* ldc, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, + const float* a, std::int64_t lda, std::int64_t stride_a, const float* b, + std::int64_t ldb, std::int64_t stride_b, value_or_pointer beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, value_or_pointer alpha, + const double* a, std::int64_t lda, std::int64_t stride_a, const double* b, + std::int64_t ldb, std::int64_t stride_b, value_or_pointer beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer> beta, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer> beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer> beta, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer> beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, sycl::half *c, std::int64_t ldc, + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); + const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const bfloat16 *a, std::int64_t lda, std::int64_t stride_a, - const bfloat16 *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, bfloat16 *c, std::int64_t ldc, + const bfloat16* a, std::int64_t lda, std::int64_t stride_a, + const bfloat16* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, bfloat16* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const bfloat16 *a, std::int64_t lda, std::int64_t stride_a, - const bfloat16 *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); + const bfloat16* a, std::int64_t lda, std::int64_t stride_a, + const bfloat16* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, std::int32_t *c, std::int64_t ldc, + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, value_or_pointer alpha, - const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, - value_or_pointer beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, + value_or_pointer beta, float* c, std::int64_t ldc, + std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t m, std::int64_t n, value_or_pointer alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + value_or_pointer alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, - const transpose *trans, const diag *unit_diag, const std::int64_t *m, - const std::int64_t *n, const float *alpha, const float **a, - const std::int64_t *lda, float **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, - const transpose *trans, const diag *unit_diag, const std::int64_t *m, - const std::int64_t *n, const double *alpha, const double **a, - const std::int64_t *lda, double **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, - const transpose *trans, const diag *unit_diag, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - std::complex **b, const std::int64_t *ldb, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event trsm_batch(sycl::queue &queue, const side *left_right, const uplo *upper_lower, - const transpose *trans, const diag *unit_diag, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - std::complex **b, const std::int64_t *ldb, std::int64_t group_count, - const std::int64_t *group_size, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower, + const transpose* trans, const diag* unit_diag, const std::int64_t* m, + const std::int64_t* n, const float* alpha, const float** a, + const std::int64_t* lda, float** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower, + const transpose* trans, const diag* unit_diag, const std::int64_t* m, + const std::int64_t* n, const double* alpha, const double** a, + const std::int64_t* lda, double** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower, + const transpose* trans, const diag* unit_diag, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + std::complex** b, const std::int64_t* ldb, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event trsm_batch(sycl::queue& queue, const side* left_right, const uplo* upper_lower, + const transpose* trans, const diag* unit_diag, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + std::complex** b, const std::int64_t* ldb, std::int64_t group_count, + const std::int64_t* group_size, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, float *ab, std::int64_t lda, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, double *ab, std::int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, std::complex *ab, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, std::complex *ab, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const float *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const float* a, std::int64_t lda, std::int64_t stride_a, value_or_pointer beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const double *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const double* a, std::int64_t lda, std::int64_t stride_a, value_or_pointer beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - value_or_pointer> beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + value_or_pointer> beta, const std::complex* b, + std::int64_t ldb, std::int64_t stride_b, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - value_or_pointer> beta, const std::complex *b, - std::int64_t ldb, std::int64_t stride_b, std::complex *c, - std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, float *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, double *b, - std::int64_t ldb, const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); - -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + value_or_pointer> beta, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, float* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, double* b, + std::int64_t ldb, const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies = {}); + +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, std::complex *b, +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer alpha, double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer alpha, double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies = {}); -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, std::complex *ab, +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - value_or_pointer> alpha, std::complex *ab, +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + value_or_pointer> alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const float *a, std::int64_t lda, - value_or_pointer beta, const float *b, std::int64_t ldb, float *c, - std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const float* a, std::int64_t lda, + value_or_pointer beta, const float* b, std::int64_t ldb, float* c, + std::int64_t ldc, const std::vector& dependencies = {}); -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, value_or_pointer alpha, const double *a, std::int64_t lda, - value_or_pointer beta, const double *b, std::int64_t ldb, double *c, - std::int64_t ldc, const std::vector &dependencies = {}); +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, value_or_pointer alpha, const double* a, + std::int64_t lda, value_or_pointer beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, - value_or_pointer> beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + value_or_pointer> beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, value_or_pointer> alpha, - const std::complex *a, std::int64_t lda, - value_or_pointer> beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const float *alpha, const float **a, - const std::int64_t *lda, float **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const double *alpha, const double **a, - const std::int64_t *lda, double **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - std::complex **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event omatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - const std::complex **a, const std::int64_t *lda, - std::complex **b, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const float *alpha, float **ab, - const std::int64_t *lda, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const double *alpha, double **ab, - const std::int64_t *lda, const std::int64_t *ldb, - std::int64_t group_count, const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - std::complex **ab, const std::int64_t *lda, - const std::int64_t *ldb, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); - -sycl::event imatcopy_batch(sycl::queue &queue, const transpose *trans, const std::int64_t *m, - const std::int64_t *n, const std::complex *alpha, - std::complex **ab, const std::int64_t *lda, - const std::int64_t *ldb, std::int64_t group_count, - const std::int64_t *groupsize, - const std::vector &dependencies = {}); + const std::complex* a, std::int64_t lda, + value_or_pointer> beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const float* alpha, const float** a, + const std::int64_t* lda, float** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const double* alpha, const double** a, + const std::int64_t* lda, double** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + std::complex** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event omatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + const std::complex** a, const std::int64_t* lda, + std::complex** b, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const float* alpha, float** ab, + const std::int64_t* lda, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const double* alpha, double** ab, + const std::int64_t* lda, const std::int64_t* ldb, + std::int64_t group_count, const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + std::complex** ab, const std::int64_t* lda, + const std::int64_t* ldb, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); + +sycl::event imatcopy_batch(sycl::queue& queue, const transpose* trans, const std::int64_t* m, + const std::int64_t* n, const std::complex* alpha, + std::complex** ab, const std::int64_t* lda, + const std::int64_t* ldb, std::int64_t group_count, + const std::int64_t* groupsize, + const std::vector& dependencies = {}); diff --git a/src/blas/backends/mkl_common/mkl_extensions.cxx b/src/blas/backends/mkl_common/mkl_extensions.cxx index 4672af5c7..171e2251a 100644 --- a/src/blas/backends/mkl_common/mkl_extensions.cxx +++ b/src/blas/backends/mkl_common/mkl_extensions.cxx @@ -19,341 +19,341 @@ // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc) { blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc) { blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { return blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { return blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { return blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { return blas_major::gemm_bias(queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { return blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { return blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { return blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { return blas_major::gemmt(queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, float* b, int64_t ldb, + const std::vector& dependencies) { return blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies) { return blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies) { return blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies) { return blas_major::omatcopy(queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, std::int64_t stridea, float *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, std::int64_t stridea, float* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, std::int64_t stridea, double *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, std::int64_t stridea, double* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { return blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { return blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { return blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { return blas_major::imatcopy(queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float beta, const float *b, - int64_t ldb, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, float beta, const float* b, + int64_t ldb, float* c, int64_t ldc, + const std::vector& dependencies) { return blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double beta, const double *b, - int64_t ldb, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, double beta, const double* b, + int64_t ldb, double* c, int64_t ldc, + const std::vector& dependencies) { return blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { return blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { return blas_major::omatadd(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } diff --git a/src/blas/backends/mkl_common/mkl_level1.cxx b/src/blas/backends/mkl_common/mkl_level1.cxx index 85ccb0025..d109282d8 100644 --- a/src/blas/backends/mkl_common/mkl_level1.cxx +++ b/src/blas/backends/mkl_common/mkl_level1.cxx @@ -19,627 +19,627 @@ // Buffer APIs -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { blas_major::asum(queue, n, x, incx, result); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { blas_major::asum(queue, n, x, incx, result); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { blas_major::asum(queue, n, x, incx, result); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { blas_major::asum(queue, n, x, incx, result); } -void axpy(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { blas_major::axpy(queue, n, alpha, x, incx, y, incy); } -void axpy(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { blas_major::axpy(queue, n, alpha, x, incx, y, incy); } -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::axpy(queue, n, alpha, x, incx, y, incy); } -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::axpy(queue, n, alpha, x, incx, y, incy); } -void axpby(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { blas_major::copy(queue, n, x, incx, y, incy); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { blas_major::copy(queue, n, x, incx, y, incy); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { blas_major::copy(queue, n, x, incx, y, incy); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { blas_major::copy(queue, n, x, incx, y, incy); } -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { blas_major::dot(queue, n, x, incx, y, incy, result); } -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { blas_major::dot(queue, n, x, incx, y, incy, result); } -void sdsdot(sycl::queue &queue, std::int64_t n, float sb, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void sdsdot(sycl::queue& queue, std::int64_t n, float sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result); } -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { blas_major::dot(queue, n, x, incx, y, incy, result); } -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { blas_major::dotc(queue, n, x, incx, y, incy, result); } -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { blas_major::dotc(queue, n, x, incx, y, incy, result); } -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { blas_major::dotu(queue, n, x, incx, y, incy, result); } -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { blas_major::dotu(queue, n, x, incx, y, incy, result); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { blas_major::nrm2(queue, n, x, incx, result); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { blas_major::nrm2(queue, n, x, incx, result); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { blas_major::nrm2(queue, n, x, incx, result); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { blas_major::nrm2(queue, n, x, incx, result); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, float c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { blas_major::rot(queue, n, x, incx, y, incy, c, s); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, double c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { blas_major::rot(queue, n, x, incx, y, incy, c, s); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, float c, float s) { blas_major::rot(queue, n, x, incx, y, incy, c, s); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, double c, double s) { blas_major::rot(queue, n, x, incx, y, incy, c, s); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { blas_major::rotg(queue, a, b, c, s); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { blas_major::rotg(queue, a, b, c, s); } -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { blas_major::rotg(queue, a, b, c, s); } -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { blas_major::rotg(queue, a, b, c, s); } -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { blas_major::rotm(queue, n, x, incx, y, incy, param); } -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { blas_major::rotm(queue, n, x, incx, y, incy, param); } -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param) { blas_major::rotmg(queue, d1, d2, x1, y1, param); } -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param) { blas_major::rotmg(queue, d1, d2, x1, y1, param); } -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, std::int64_t incx) { blas_major::scal(queue, n, alpha, x, incx); } -void scal(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, std::int64_t incx) { blas_major::scal(queue, n, alpha, x, incx); } -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::scal(queue, n, alpha, x, incx); } -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::scal(queue, n, alpha, x, incx); } -void scal(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer, 1>& x, std::int64_t incx) { blas_major::scal(queue, n, alpha, x, incx); } -void scal(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::scal(queue, n, alpha, x, incx); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { blas_major::swap(queue, n, x, incx, y, incy); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { blas_major::swap(queue, n, x, incx, y, incy); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { blas_major::swap(queue, n, x, incx, y, incy); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { blas_major::swap(queue, n, x, incx, y, incy); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { blas_major::iamax(queue, n, x, incx, result); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { blas_major::iamax(queue, n, x, incx, result); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { blas_major::iamax(queue, n, x, incx, result); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { blas_major::iamax(queue, n, x, incx, result); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { blas_major::iamin(queue, n, x, incx, result); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { blas_major::iamin(queue, n, x, incx, result); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { blas_major::iamin(queue, n, x, incx, result); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { blas_major::iamin(queue, n, x, incx, result); } // USM APIs -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return blas_major::asum(queue, n, x, incx, result, dependencies); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return blas_major::asum(queue, n, x, incx, result, dependencies); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies) { return blas_major::asum(queue, n, x, incx, result, dependencies); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies) { return blas_major::asum(queue, n, x, incx, result, dependencies); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, - float *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event axpy(sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx, + float* y, std::int64_t incy, const std::vector& dependencies) { return blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::axpy(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::axpby(queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::copy(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::copy(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::copy(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::copy(queue, n, x, incx, y, incy, dependencies); } -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { return blas_major::dot(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return blas_major::dot(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event sdsdot(sycl::queue &queue, std::int64_t n, float sb, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, std::int64_t n, float sb, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { return blas_major::sdsdot(queue, n, sb, x, incx, y, incy, result, dependencies); } -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return blas_major::dot(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { return blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { return blas_major::dotc(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { return blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { return blas_major::dotu(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return blas_major::nrm2(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return blas_major::nrm2(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - float *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + float* result, const std::vector& dependencies) { return blas_major::nrm2(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - double *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + double* result, const std::vector& dependencies) { return blas_major::nrm2(queue, n, x, incx, result, dependencies); } -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { return blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { return blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float c, float s, const std::vector &dependencies) { +sycl::event rot(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, float c, float s, const std::vector& dependencies) { return blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, +sycl::event rot(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { + const std::vector& dependencies) { return blas_major::rot(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { return blas_major::rotg(queue, a, b, c, s, dependencies); } -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies) { return blas_major::rotg(queue, a, b, c, s, dependencies); } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, float *c, - std::complex *s, const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies) { return blas_major::rotg(queue, a, b, c, s, dependencies); } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, double *c, - std::complex *s, const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies) { return blas_major::rotg(queue, a, b, c, s, dependencies); } -sycl::event rotm(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, float *param, const std::vector &dependencies) { +sycl::event rotm(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, float* param, const std::vector& dependencies) { return blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotm(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, double *param, const std::vector &dependencies) { +sycl::event rotm(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, double* param, const std::vector& dependencies) { return blas_major::rotm(queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, - const std::vector &dependencies) { +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param, + const std::vector& dependencies) { return blas_major::rotmg(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param, + const std::vector& dependencies) { return blas_major::rotmg(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, float* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::scal(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, double* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::scal(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::scal(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::scal(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, float alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, float alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { return blas_major::scal(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, double alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, double alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { return blas_major::scal(queue, n, alpha, x, incx, dependencies); } -sycl::event swap(sycl::queue &queue, std::int64_t n, float *x, std::int64_t incx, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, float* x, std::int64_t incx, float* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::swap(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(sycl::queue &queue, std::int64_t n, double *x, std::int64_t incx, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, double* x, std::int64_t incx, double* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::swap(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::swap(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::swap(queue, n, x, incx, y, incy, dependencies); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { return blas_major::iamax(queue, n, x, incx, result, dependencies); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { return blas_major::iamax(queue, n, x, incx, result, dependencies); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return blas_major::iamax(queue, n, x, incx, result, dependencies); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return blas_major::iamax(queue, n, x, incx, result, dependencies); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { return blas_major::iamin(queue, n, x, incx, result, dependencies); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { return blas_major::iamin(queue, n, x, incx, result, dependencies); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return blas_major::iamin(queue, n, x, incx, result, dependencies); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return blas_major::iamin(queue, n, x, incx, result, dependencies); } diff --git a/src/blas/backends/mkl_common/mkl_level2.cxx b/src/blas/backends/mkl_common/mkl_level2.cxx index 83494be12..56fa591dc 100644 --- a/src/blas/backends/mkl_common/mkl_level2.cxx +++ b/src/blas/backends/mkl_common/mkl_level2.cxx @@ -19,844 +19,844 @@ // Buffer APIs -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, + std::int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, sycl::buffer& a, std::int64_t lda) { blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void hbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::hbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::hbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::hemv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { blas_major::hemv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy); } -void her(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { blas_major::her(queue, uplo, n, alpha, x, incx, a, lda); } -void her(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { blas_major::her(queue, uplo, n, alpha, x, incx, a, lda); } -void her2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { blas_major::her2(queue, uplo, n, alpha, x, incx, y, incy, a, lda); } -void her2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { blas_major::her2(queue, uplo, n, alpha, x, incx, y, incy, a, lda); } -void hpmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { blas_major::hpmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy); } -void hpmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, - std::int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, + std::int64_t incx, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { blas_major::hpmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy); } -void hpr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { blas_major::hpr(queue, uplo, n, alpha, x, incx, a); } -void hpr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { blas_major::hpr(queue, uplo, n, alpha, x, incx, a); } -void hpr2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { blas_major::hpr2(queue, uplo, n, alpha, x, incx, y, incy, a); } -void hpr2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { blas_major::hpr2(queue, uplo, n, alpha, x, incx, y, incy, a); } -void sbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { +void sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { blas_major::sbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void sbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { +void sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { blas_major::sbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void spmv(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { blas_major::spmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy); } -void spmv(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { blas_major::spmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy); } -void spr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a) { +void spr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a) { blas_major::spr(queue, uplo, n, alpha, x, incx, a); } -void spr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a) { +void spr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a) { blas_major::spr(queue, uplo, n, alpha, x, incx, a); } -void spr2(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a) { +void spr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a) { blas_major::spr2(queue, uplo, n, alpha, x, incx, y, incy, a); } -void spr2(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a) { +void spr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a) { blas_major::spr2(queue, uplo, n, alpha, x, incx, y, incy, a); } -void symv(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +void symv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { blas_major::symv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy); } -void symv(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy) { +void symv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy) { blas_major::symv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy); } -void syr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda) { +void syr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a, std::int64_t lda) { blas_major::syr(queue, uplo, n, alpha, x, incx, a, lda); } -void syr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda) { +void syr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a, std::int64_t lda) { blas_major::syr(queue, uplo, n, alpha, x, incx, a, lda); } -void syr2(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +void syr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { blas_major::syr2(queue, uplo, n, alpha, x, incx, y, incy, a, lda); } -void syr2(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda) { +void syr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda) { blas_major::syr2(queue, uplo, n, alpha, x, incx, y, incy, a, lda); } -void tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx); } -void tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx); } -void tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx); } -void tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx); } -void tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx); } -void tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx); } -void tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx); } -void tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, std::int64_t k, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx); } -void tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { +void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx); } -void tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { +void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx); } -void tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx); } -void tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx); } -void tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { +void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx); } -void tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx) { +void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx); } -void tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx); } -void tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, sycl::buffer, 1>& x, std::int64_t incx) { blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx); } -void trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx); } -void trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx); } -void trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx); } -void trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx); } -void trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx); } -void trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, +void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, std::int64_t incx) { blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx); } -void trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx); } -void trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx); } // USM APIs -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { return blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { return blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::gemv(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, float alpha, const float *a, std::int64_t lda, - const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, float alpha, const float* a, std::int64_t lda, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, double alpha, const double *a, std::int64_t lda, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, double alpha, const double* a, std::int64_t lda, + const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, +sycl::event gbmv(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::gbmv(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::ger(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::gerc(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::geru(queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::hbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::hbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::hemv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event hemv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::hemv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event her(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return blas_major::her(queue, uplo, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return blas_major::her(queue, uplo, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::her2(queue, uplo, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event her2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::her2(queue, uplo, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hpmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::hpmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *a, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* a, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::hpmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies) { return blas_major::hpr(queue, uplo, n, alpha, x, incx, a, dependencies); } -sycl::event hpr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies) { return blas_major::hpr(queue, uplo, n, alpha, x, incx, a, dependencies); } -sycl::event hpr2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return blas_major::hpr2(queue, uplo, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event hpr2(sycl::queue &queue, uplo uplo, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(sycl::queue& queue, uplo uplo, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return blas_major::hpr2(queue, uplo, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event sbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, float alpha, + const float* a, std::int64_t lda, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { return blas_major::sbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event sbmv(sycl::queue &queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event sbmv(sycl::queue& queue, uplo uplo, std::int64_t n, std::int64_t k, double alpha, + const double* a, std::int64_t lda, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { return blas_major::sbmv(queue, uplo, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *a, - const float *x, std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event spmv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* a, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::spmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *a, - const double *x, std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event spmv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* a, + const double* x, std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return blas_major::spmv(queue, uplo, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, const std::vector &dependencies) { +sycl::event spr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, const std::vector& dependencies) { return blas_major::spr(queue, uplo, n, alpha, x, incx, a, dependencies); } -sycl::event spr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, const std::vector &dependencies) { +sycl::event spr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, const std::vector& dependencies) { return blas_major::spr(queue, uplo, n, alpha, x, incx, a, dependencies); } -sycl::event spr2(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, - const std::vector &dependencies) { +sycl::event spr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, + const std::vector& dependencies) { return blas_major::spr2(queue, uplo, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event spr2(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, - const std::vector &dependencies) { +sycl::event spr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* a, + const std::vector& dependencies) { return blas_major::spr2(queue, uplo, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event symv(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event symv(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::symv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event symv(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event symv(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return blas_major::symv(queue, uplo, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event syr(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::syr(queue, uplo, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::syr(queue, uplo, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr2(sycl::queue &queue, uplo uplo, std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr2(sycl::queue& queue, uplo uplo, std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::syr2(queue, uplo, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event syr2(sycl::queue &queue, uplo uplo, std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr2(sycl::queue& queue, uplo uplo, std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return blas_major::syr2(queue, uplo, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tbmv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - std::int64_t k, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + std::int64_t k, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tbsv(queue, uplo, trans, diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx, dependencies); } -sycl::event tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx, dependencies); } -sycl::event tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx, dependencies); } -sycl::event tpmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tpmv(queue, uplo, trans, diag, n, a, x, incx, dependencies); } -sycl::event tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx, dependencies); } -sycl::event tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx, dependencies); } -sycl::event tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx, dependencies); } -sycl::event tpsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::tpsv(queue, uplo, trans, diag, n, a, x, incx, dependencies); } -sycl::event trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { return blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { return blas_major::trmv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const float* a, std::int64_t lda, float* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const double* a, std::int64_t lda, double* x, std::int64_t incx, + const std::vector& dependencies) { return blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { return blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(sycl::queue &queue, uplo uplo, transpose trans, diag diag, std::int64_t n, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(sycl::queue& queue, uplo uplo, transpose trans, diag diag, std::int64_t n, + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { return blas_major::trsv(queue, uplo, trans, diag, n, a, lda, x, incx, dependencies); } diff --git a/src/blas/backends/mkl_common/mkl_level3.cxx b/src/blas/backends/mkl_common/mkl_level3.cxx index d52c710f1..e67afc26d 100644 --- a/src/blas/backends/mkl_common/mkl_level3.cxx +++ b/src/blas/backends/mkl_common/mkl_level3.cxx @@ -19,501 +19,501 @@ // Buffer APIs -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::half beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::half alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::half beta, + sycl::buffer& c, std::int64_t ldc) { blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, float beta, - sycl::buffer &c, std::int64_t ldc) { +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, float beta, + sycl::buffer& c, std::int64_t ldc) { blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, double beta, - sycl::buffer &c, std::int64_t ldc) { +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, double beta, + sycl::buffer& c, std::int64_t ldc) { blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc) { +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { blas_major::her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb) { +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb); } // USM APIs -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, sycl::half beta, - sycl::half *c, std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, const std::vector& dependencies) { return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, std::int64_t lda, - const bfloat16 *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, std::int64_t lda, + const bfloat16* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::gemm(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::symm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::hemm(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, float beta, - float *c, std::int64_t ldc, const std::vector &dependencies) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies) { return blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, double beta, - double *c, std::int64_t ldc, const std::vector &dependencies) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies) { return blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return blas_major::syrk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const std::complex *a, std::int64_t lda, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::herk(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::syr2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, float beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, double beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return blas_major::her2k(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return blas_major::trmm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return blas_major::trsm(queue, left_right, upper_lower, transa, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } diff --git a/src/blas/backends/netlib/netlib_batch.cxx b/src/blas/backends/netlib/netlib_batch.cxx index 7a2839dd4..5af30b80f 100644 --- a/src/blas/backends/netlib/netlib_batch.cxx +++ b/src/blas/backends/netlib/netlib_batch.cxx @@ -19,8 +19,8 @@ // Buffer APIs -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); @@ -30,8 +30,8 @@ void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_ #endif } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); @@ -41,8 +41,8 @@ void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64 #endif } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); @@ -52,8 +52,8 @@ void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, #endif } -void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, - int64_t incx, int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, + int64_t incx, int64_t stridex, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); @@ -63,9 +63,9 @@ void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer #endif } -void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, - int64_t stridey, int64_t batch_size) { +void axpy_batch(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, + int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -74,8 +74,8 @@ void axpy_batch(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +void axpy_batch(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); @@ -85,9 +85,9 @@ void axpy_batch(sycl::queue &queue, int64_t n, double alpha, sycl::buffer alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); @@ -97,9 +97,9 @@ void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, int64_t stridex, - sycl::buffer, 1> &y, int64_t incy, int64_t stridey, +void axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, int64_t stridex, + sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); @@ -109,10 +109,10 @@ void axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, float beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, float beta, sycl::buffer& y, int64_t incy, + int64_t stride_y, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -121,11 +121,10 @@ void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, floa #endif } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, double beta, - sycl::buffer &y, int64_t incy, int64_t stride_y, - int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, double beta, + sycl::buffer& y, int64_t incy, int64_t stride_y, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -134,12 +133,11 @@ void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, doub #endif } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &x, int64_t incx, - int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, - int64_t batch_size) { +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, sycl::buffer, 1>& y, + int64_t incy, int64_t stride_y, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -148,11 +146,11 @@ void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, #endif } -void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &x, - int64_t incx, int64_t stride_x, std::complex beta, - sycl::buffer, 1> &y, int64_t incy, int64_t stride_y, +void gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& x, int64_t incx, + int64_t stride_x, std::complex beta, + sycl::buffer, 1>& y, int64_t incy, int64_t stride_y, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); @@ -162,10 +160,10 @@ void gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, #endif } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, sycl::buffer& x, + int64_t incx, int64_t stride_x, sycl::buffer& c, int64_t ldc, + int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -174,10 +172,10 @@ void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, #endif } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &x, int64_t incx, int64_t stride_x, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& x, int64_t incx, int64_t stride_x, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -186,10 +184,10 @@ void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, #endif } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); @@ -199,10 +197,10 @@ void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, #endif } -void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &x, int64_t incx, int64_t stride_x, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& x, int64_t incx, int64_t stride_x, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); @@ -212,11 +210,10 @@ void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -225,11 +222,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, double beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -238,12 +234,11 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, - int64_t ldb, int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, + int64_t stride_b, std::complex beta, sycl::buffer, 1>& c, + int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -252,11 +247,11 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -266,10 +261,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -279,10 +274,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -292,10 +287,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -305,10 +300,10 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int64_t stride_a, sycl::buffer &b, int64_t ldb, int64_t stride_b, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, + float beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); @@ -318,9 +313,9 @@ void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); @@ -330,9 +325,9 @@ void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose #endif } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, int64_t stride_a, sycl::buffer &b, int64_t ldb, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, int64_t stride_a, sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); @@ -342,10 +337,10 @@ void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose #endif } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); @@ -355,10 +350,10 @@ void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose #endif } -void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, +void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, int64_t stride_a, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); @@ -368,10 +363,9 @@ void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose #endif } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, float beta, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -380,9 +374,9 @@ void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n #endif } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); @@ -392,11 +386,10 @@ void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n #endif } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, - int64_t batch_size) { +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, sycl::buffer, 1>& c, + int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -405,10 +398,10 @@ void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n #endif } -void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, +void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); @@ -418,9 +411,9 @@ void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n #endif } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -429,9 +422,9 @@ void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, f #endif } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, int64_t stride_a, - sycl::buffer &b, int64_t ldb, int64_t stride_b, int64_t batch_size) { +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, int64_t stride_a, + sycl::buffer& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -440,9 +433,9 @@ void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, d #endif } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stride_a, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); @@ -452,9 +445,9 @@ void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, int64_t stride_a, sycl::buffer, 1> &b, +void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, int64_t stride_a, sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); @@ -464,8 +457,8 @@ void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); @@ -475,8 +468,8 @@ void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, f #endif } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); @@ -486,8 +479,8 @@ void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, d #endif } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); @@ -497,8 +490,8 @@ void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); @@ -508,10 +501,10 @@ void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - float beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + float beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -520,10 +513,10 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 #endif } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, int64_t stride_a, - double beta, sycl::buffer &b, int64_t ldb, int64_t stride_b, - sycl::buffer &c, int64_t ldc, int64_t stride_c, int64_t batch_size) { +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, int64_t stride_a, + double beta, sycl::buffer& b, int64_t ldb, int64_t stride_b, + sycl::buffer& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -532,11 +525,11 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 #endif } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); @@ -546,11 +539,11 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 #endif } -void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, +void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, int64_t ldb, int64_t stride_b, - sycl::buffer, 1> &c, int64_t ldc, int64_t stride_c, + sycl::buffer, 1>& b, int64_t ldb, int64_t stride_b, + sycl::buffer, 1>& c, int64_t ldc, int64_t stride_c, int64_t batch_size) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); @@ -562,9 +555,9 @@ void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64 // USM APIs -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t *incx, - float **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const float** x, int64_t* incx, float** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -573,9 +566,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t *n, const float **x, int64_t #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t *incx, - double **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const double** x, int64_t* incx, double** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -584,10 +577,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t *n, const double **x, int64_t #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, int64_t* incx, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -596,10 +588,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex **x, - int64_t *incx, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t* n, const std::complex** x, + int64_t* incx, std::complex** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -608,10 +599,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t *n, const std::complex &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const float* x, int64_t incx, + std::int64_t stridex, float* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -620,10 +610,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t n, const float *x, int64_t in #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t incx, - std::int64_t stridex, double *y, int64_t incy, std::int64_t stridey, - std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const double* x, int64_t incx, + std::int64_t stridex, double* y, int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -632,10 +621,10 @@ sycl::event copy_batch(sycl::queue &queue, int64_t n, const double *x, int64_t i #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, - int64_t incx, std::int64_t stridex, std::complex *y, int64_t incy, - std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -644,10 +633,10 @@ sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex #endif } -sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex *x, - int64_t incx, std::int64_t stridex, std::complex *y, - int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::int64_t stridex, std::complex* y, int64_t incy, + std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "copy_batch", "for column_major layout"); #endif @@ -656,9 +645,9 @@ sycl::event copy_batch(sycl::queue &queue, int64_t n, const std::complex #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float **x, - int64_t *incx, float **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, float* alpha, const float** x, int64_t* incx, + float** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -667,9 +656,9 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t *n, float *alpha, const float #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const double **x, - int64_t *incx, double **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, double* alpha, const double** x, + int64_t* incx, double** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -678,10 +667,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t *n, double *alpha, const doub #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -690,10 +679,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alph #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alpha, - const std::complex **x, int64_t *incx, std::complex **y, - int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t* n, std::complex* alpha, + const std::complex** x, int64_t* incx, std::complex** y, + int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -702,9 +691,9 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t *n, std::complex *alp #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float *x, - int64_t incx, int64_t stridex, float *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + int64_t stridex, float* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -713,9 +702,9 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t n, float alpha, const float * #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double *x, - int64_t incx, int64_t stridex, double *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + int64_t stridex, double* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -724,10 +713,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t n, double alpha, const double #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -736,10 +725,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, int64_t stridex, - std::complex *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, int64_t stridex, + std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpy_batch", "for column_major layout"); #endif @@ -748,11 +737,10 @@ sycl::event axpy_batch(sycl::queue &queue, int64_t n, std::complex alpha #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, int64_t stride_a, - const float *x, int64_t incx, int64_t stride_x, float beta, float *y, - int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float beta, float* y, int64_t incy, int64_t stride_y, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -761,11 +749,11 @@ sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, int64_t stride_a, - const double *x, int64_t incx, int64_t stride_x, double beta, double *y, - int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, const double* x, + int64_t incx, int64_t stride_x, double beta, double* y, int64_t incy, + int64_t stride_y, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -774,12 +762,12 @@ sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, - int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, + int64_t incy, int64_t stride_y, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -788,12 +776,12 @@ sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, const std::complex *x, int64_t incx, - int64_t stride_x, std::complex beta, std::complex *y, - int64_t incy, int64_t stride_y, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose transa, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, const std::complex* x, int64_t incx, + int64_t stride_x, std::complex beta, std::complex* y, + int64_t incy, int64_t stride_y, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -802,11 +790,10 @@ sycl::event gemv_batch(sycl::queue &queue, transpose transa, int64_t m, int64_t #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - float *alpha, const float **a, int64_t *lda, const float **x, - int64_t *incx, float *beta, float **y, int64_t *incy, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, float* alpha, + const float** a, int64_t* lda, const float** x, int64_t* incx, float* beta, + float** y, int64_t* incy, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -815,11 +802,10 @@ sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_ #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - double *alpha, const double **a, int64_t *lda, const double **x, - int64_t *incx, double *beta, double **y, int64_t *incy, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, double* alpha, + const double** a, int64_t* lda, const double** x, int64_t* incx, + double* beta, double** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -828,11 +814,11 @@ sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_ #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex *beta, - std::complex **y, int64_t *incy, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -841,12 +827,11 @@ sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_ #endif } -sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, const std::complex **x, int64_t *incx, - std::complex *beta, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, transpose* transa, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, int64_t* lda, + const std::complex** x, int64_t* incx, std::complex* beta, + std::complex** y, int64_t* incy, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemv_batch", "for column_major layout"); #endif @@ -855,10 +840,10 @@ sycl::event gemv_batch(sycl::queue &queue, transpose *transa, int64_t *m, int64_ #endif } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const float *a, int64_t lda, int64_t stride_a, const float *x, - int64_t incx, int64_t stride_x, float *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const float* a, + int64_t lda, int64_t stride_a, const float* x, int64_t incx, + int64_t stride_x, float* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -867,10 +852,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n #endif } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const double *a, int64_t lda, int64_t stride_a, const double *x, - int64_t incx, int64_t stride_x, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, const double* a, + int64_t lda, int64_t stride_a, const double* x, int64_t incx, + int64_t stride_x, double* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -879,11 +864,11 @@ sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n #endif } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -892,11 +877,11 @@ sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n #endif } -sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *x, int64_t incx, int64_t stride_x, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, + const std::complex* a, int64_t lda, int64_t stride_a, + const std::complex* x, int64_t incx, int64_t stride_x, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -905,10 +890,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n #endif } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const float **a, int64_t *lda, const float **x, int64_t *incx, float **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const float** a, int64_t* lda, const float** x, int64_t* incx, float** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -917,10 +902,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t #endif } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const double **a, int64_t *lda, const double **x, int64_t *incx, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const double** a, int64_t* lda, const double** x, int64_t* incx, double** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -929,11 +914,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t #endif } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -942,11 +926,10 @@ sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t #endif } -sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, - const std::complex **a, int64_t *lda, - const std::complex **x, int64_t *incx, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, + const std::complex** a, int64_t* lda, const std::complex** x, + int64_t* incx, std::complex** c, int64_t* ldc, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "dgmm_batch", "for column_major layout"); #endif @@ -955,11 +938,11 @@ sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const float **a, int64_t *lda, - const float **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const float** a, int64_t* lda, + const float** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -968,11 +951,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, double *alpha, const double **a, int64_t *lda, - const double **b, int64_t *ldb, double *beta, double **c, int64_t *ldc, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, double* alpha, const double** a, int64_t* lda, + const double** b, int64_t* ldb, double* beta, double** c, int64_t* ldc, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -981,12 +964,12 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, - const std::complex **b, int64_t *ldb, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -995,12 +978,12 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, std::complex *alpha, - const std::complex **a, int64_t *lda, - const std::complex **b, int64_t *ldb, std::complex *beta, - std::complex **c, int64_t *ldc, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, std::complex* alpha, + const std::complex** a, int64_t* lda, const std::complex** b, + int64_t* ldb, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1009,11 +992,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, sycl::half *alpha, const sycl::half **a, - int64_t *lda, const sycl::half **b, int64_t *ldb, sycl::half *beta, - sycl::half **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, sycl::half* alpha, const sycl::half** a, + int64_t* lda, const sycl::half** b, int64_t* ldb, sycl::half* beta, + sycl::half** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1022,11 +1005,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const sycl::half **a, int64_t *lda, - const sycl::half **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const sycl::half** a, int64_t* lda, + const sycl::half** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1035,11 +1018,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, float **c, int64_t *ldc, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, float** c, int64_t* ldc, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1048,11 +1031,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, - int64_t *n, int64_t *k, float *alpha, const std::int8_t **a, int64_t *lda, - const std::int8_t **b, int64_t *ldb, float *beta, std::int32_t **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, + int64_t* n, int64_t* k, float* alpha, const std::int8_t** a, int64_t* lda, + const std::int8_t** b, int64_t* ldb, float* beta, std::int32_t** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1061,11 +1044,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - int64_t stride_a, const float *b, int64_t ldb, int64_t stride_b, - float beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, int64_t stride_a, + const float* b, int64_t ldb, int64_t stride_b, float beta, float* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1074,11 +1057,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, - int64_t stride_a, const double *b, int64_t ldb, int64_t stride_b, - double beta, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, int64_t stride_a, + const double* b, int64_t ldb, int64_t stride_b, double beta, double* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1087,13 +1070,12 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex beta, std::complex *c, int64_t ldc, - int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1102,13 +1084,12 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, int64_t stride_a, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex beta, std::complex *c, int64_t ldc, - int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, int64_t stride_a, const std::complex* b, int64_t ldb, + int64_t stride_b, std::complex beta, std::complex* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1117,11 +1098,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, - int64_t stride_a, const sycl::half *b, int64_t ldb, int64_t stride_b, - sycl::half beta, sycl::half *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, + int64_t stride_a, const sycl::half* b, int64_t ldb, int64_t stride_b, + sycl::half beta, sycl::half* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1130,11 +1111,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, int64_t stride_a, - const sycl::half *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, int64_t stride_a, + const sycl::half* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1143,11 +1124,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, float *c, +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1156,11 +1137,11 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const std::int8_t *a, int64_t lda, int64_t stride_a, - const std::int8_t *b, int64_t ldb, int64_t stride_b, float beta, - std::int32_t *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const std::int8_t* a, int64_t lda, int64_t stride_a, + const std::int8_t* b, int64_t ldb, int64_t stride_b, float beta, + std::int32_t* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_batch", "for column_major layout"); #endif @@ -1169,11 +1150,10 @@ sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, i #endif } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, - int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1182,11 +1162,10 @@ sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, - int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1195,11 +1174,11 @@ sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1208,11 +1187,11 @@ sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, int64_t stride_a, + std::complex* b, int64_t ldb, int64_t stride_b, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1221,11 +1200,10 @@ sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, float *alpha, - const float **a, int64_t *lda, float **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, float* alpha, const float** a, + int64_t* lda, float** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1234,11 +1212,10 @@ sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, double *alpha, - const double **a, int64_t *lda, double **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, double* alpha, const double** a, + int64_t* lda, double** b, int64_t* ldb, int64_t group_count, + int64_t* groupsize, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1247,11 +1224,11 @@ sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, int64_t *lda, - std::complex **b, int64_t *ldb, int64_t group_count, - int64_t *groupsize, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1260,12 +1237,11 @@ sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, #endif } -sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, transpose* trans, + diag* unit_diag, int64_t* m, int64_t* n, std::complex* alpha, + const std::complex** a, int64_t* lda, std::complex** b, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "trsm_batch", "for column_major layout"); #endif @@ -1274,10 +1250,10 @@ sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, float *alpha, const float **a, int64_t *lda, float *beta, - float **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, float* alpha, const float** a, int64_t* lda, float* beta, + float** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1286,10 +1262,10 @@ sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, double *alpha, const double **a, int64_t *lda, double *beta, - double **c, int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, double* alpha, const double** a, int64_t* lda, double* beta, + double** c, int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1298,11 +1274,11 @@ sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1311,11 +1287,11 @@ sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, - int64_t *k, std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex *beta, std::complex **c, - int64_t *ldc, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, + int64_t* k, std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex* beta, std::complex** c, + int64_t* ldc, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1324,10 +1300,10 @@ sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, float *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, int64_t stride_a, float beta, + float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1336,10 +1312,10 @@ sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, in #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, double *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, int64_t stride_a, double beta, + double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1348,11 +1324,11 @@ sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, in #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex beta, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1361,11 +1337,11 @@ sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, in #endif } -sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, int64_t stride_a, std::complex beta, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex beta, std::complex* c, + int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "syrk_batch", "for column_major layout"); #endif @@ -1374,10 +1350,10 @@ sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, in #endif } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, int64_t stride_a, float *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, int64_t stride_a, float* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -1386,10 +1362,10 @@ sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, int64_t stride_a, double *b, int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, int64_t stride_a, double* b, int64_t ldb, int64_t stride_b, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -1398,10 +1374,10 @@ sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -1410,10 +1386,10 @@ sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stride_a, std::complex *b, int64_t ldb, int64_t stride_b, - int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stride_a, std::complex* b, int64_t ldb, int64_t stride_b, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy_batch", "for column_major layout"); #endif @@ -1422,9 +1398,9 @@ sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); #endif @@ -1433,9 +1409,9 @@ sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); #endif @@ -1444,10 +1420,10 @@ sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); #endif @@ -1456,10 +1432,10 @@ sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); #endif @@ -1468,11 +1444,11 @@ sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64 #endif } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, float alpha, const float *a, int64_t lda, int64_t stride_a, - float beta, const float *b, int64_t ldb, int64_t stride_b, float *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, float alpha, const float* a, int64_t lda, int64_t stride_a, + float beta, const float* b, int64_t ldb, int64_t stride_b, float* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -1481,11 +1457,11 @@ sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb #endif } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, double alpha, const double *a, int64_t lda, int64_t stride_a, - double beta, const double *b, int64_t ldb, int64_t stride_b, double *c, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, double alpha, const double* a, int64_t lda, int64_t stride_a, + double beta, const double* b, int64_t ldb, int64_t stride_b, double* c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -1494,12 +1470,12 @@ sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb #endif } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, int64_t batch_size, - const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, int64_t batch_size, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif @@ -1508,12 +1484,12 @@ sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb #endif } -sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, std::complex alpha, const std::complex *a, +sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, std::complex alpha, const std::complex* a, int64_t lda, int64_t stride_a, std::complex beta, - const std::complex *b, int64_t ldb, int64_t stride_b, - std::complex *c, int64_t ldc, int64_t stride_c, - int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, int64_t ldb, int64_t stride_b, + std::complex* c, int64_t ldc, int64_t stride_c, + int64_t batch_size, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd_batch", "for column_major layout"); #endif diff --git a/src/blas/backends/netlib/netlib_common.hpp b/src/blas/backends/netlib/netlib_common.hpp index 3a69c70f8..18c08221d 100644 --- a/src/blas/backends/netlib/netlib_common.hpp +++ b/src/blas/backends/netlib/netlib_common.hpp @@ -79,19 +79,19 @@ inline CBLAS_OFFSET convert_to_cblas_offset(offset offsetc) { // host_task automatically uses run_on_host_intel if it is supported by the // compiler. Otherwise, it falls back to single_task. template -static inline auto host_task_internal(H &cgh, F f, int) -> decltype(cgh.host_task(f)) { +static inline auto host_task_internal(H& cgh, F f, int) -> decltype(cgh.host_task(f)) { return cgh.host_task(f); } template -static inline void host_task_internal(H &cgh, F f, long) { +static inline void host_task_internal(H& cgh, F f, long) { #ifndef __SYCL_DEVICE_ONLY__ cgh.template single_task(f); #endif } template -static inline void host_task(H &cgh, F f) { +static inline void host_task(H& cgh, F f) { (void)host_task_internal(cgh, f, 0); } diff --git a/src/blas/backends/netlib/netlib_extensions.cxx b/src/blas/backends/netlib/netlib_extensions.cxx index 8e94cb880..d0c13ebbd 100644 --- a/src/blas/backends/netlib/netlib_extensions.cxx +++ b/src/blas/backends/netlib/netlib_extensions.cxx @@ -19,11 +19,10 @@ // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -32,11 +31,10 @@ void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset of #endif } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, - float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -45,11 +43,10 @@ void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset of #endif } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, - float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -58,11 +55,10 @@ void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset of #endif } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, sycl::buffer &a, - int64_t lda, uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, - float beta, sycl::buffer &c, int64_t ldc, - sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -71,9 +67,9 @@ void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset of #endif } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); @@ -83,9 +79,9 @@ void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose tra #endif } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); @@ -95,10 +91,10 @@ void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose tra #endif } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -107,10 +103,10 @@ void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose tra #endif } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -119,8 +115,8 @@ void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose tra #endif } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -129,8 +125,8 @@ void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float a #endif } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -139,9 +135,9 @@ void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double #endif } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -150,9 +146,9 @@ void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::co #endif } -void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -161,9 +157,9 @@ void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::co #endif } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -172,9 +168,9 @@ void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float #endif } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -183,9 +179,9 @@ void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double #endif } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -194,9 +190,9 @@ void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::c #endif } -void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, int64_t ldb, +void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, int64_t ldb, std::int64_t strideb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); @@ -206,8 +202,8 @@ void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -216,8 +212,8 @@ void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float a #endif } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -226,8 +222,8 @@ void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double #endif } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -236,8 +232,8 @@ void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::co #endif } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -246,9 +242,9 @@ void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::co #endif } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, sycl::buffer &a, int64_t lda, float beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, sycl::buffer& a, int64_t lda, float beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -257,9 +253,9 @@ void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, #endif } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, sycl::buffer &a, int64_t lda, double beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, sycl::buffer& a, int64_t lda, double beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -268,10 +264,10 @@ void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, #endif } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -280,10 +276,10 @@ void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, #endif } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &b, int64_t ldb, - sycl::buffer, 1> &c, int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& b, int64_t ldb, + sycl::buffer, 1>& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -294,11 +290,11 @@ void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - const int8_t *a, int64_t lda, int8_t ao, const int8_t *b, int64_t ldb, - int8_t bo, float beta, int32_t *c, int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -307,11 +303,11 @@ sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, #endif } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - const int8_t *a, int64_t lda, int8_t ao, const uint8_t *b, int64_t ldb, - uint8_t bo, float beta, int32_t *c, int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -320,11 +316,11 @@ sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, #endif } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - const uint8_t *a, int64_t lda, uint8_t ao, const int8_t *b, int64_t ldb, - int8_t bo, float beta, int32_t *c, int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -333,11 +329,11 @@ sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, #endif } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, - offset offsetc, int64_t m, int64_t n, int64_t k, float alpha, - const uint8_t *a, int64_t lda, uint8_t ao, const uint8_t *b, int64_t ldb, - uint8_t bo, float beta, int32_t *c, int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm_bias", "for column_major layout"); #endif @@ -346,10 +342,10 @@ sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, #endif } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, - const float *b, int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -358,10 +354,10 @@ sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transp #endif } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, - const double *b, int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -370,11 +366,11 @@ sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transp #endif } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -383,11 +379,11 @@ sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transp #endif } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *b, - int64_t ldb, std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemmt", "for column_major layout"); #endif @@ -396,9 +392,9 @@ sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transp #endif } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, float *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, float* b, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -407,9 +403,9 @@ sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, double *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, double* b, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -418,10 +414,10 @@ sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -430,10 +426,10 @@ sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy", "for column_major layout"); #endif @@ -442,9 +438,9 @@ sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, std::int64_t stridea, float *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, std::int64_t stridea, float* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -453,9 +449,9 @@ sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, std::int64_t stridea, double *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, std::int64_t stridea, double* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -464,10 +460,10 @@ sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -476,10 +472,10 @@ sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::int64_t stridea, std::complex *b, int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::int64_t stridea, std::complex* b, int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatcopy2", "for column_major layout"); #endif @@ -488,9 +484,9 @@ sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -499,9 +495,9 @@ sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -510,9 +506,9 @@ sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -521,9 +517,9 @@ sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "imatcopy", "for column_major layout"); #endif @@ -532,10 +528,10 @@ sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, #endif } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, float beta, const float *b, - int64_t ldb, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, float beta, const float* b, + int64_t ldb, float* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -544,10 +540,10 @@ sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int6 #endif } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, double beta, const double *b, - int64_t ldb, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, double beta, const double* b, + int64_t ldb, double* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -556,11 +552,11 @@ sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int6 #endif } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -569,11 +565,11 @@ sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int6 #endif } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, const std::complex *b, int64_t ldb, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, const std::complex* b, int64_t ldb, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "omatadd", "for column_major layout"); #endif @@ -581,5 +577,3 @@ sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int6 throw unimplemented("blas", "omatadd", "for row_major layout"); #endif } - - diff --git a/src/blas/backends/netlib/netlib_level1.cpp b/src/blas/backends/netlib/netlib_level1.cpp index 59830db81..284adce75 100644 --- a/src/blas/backends/netlib/netlib_level1.cpp +++ b/src/blas/backends/netlib/netlib_level1.cpp @@ -43,7 +43,7 @@ inline double abs_val(std::complex val) { return std::abs(val.real()) + std::abs(val.imag()); } -int cblas_isamin(int n, const float *x, int incx) { +int cblas_isamin(int n, const float* x, int incx) { if (n < 1 || incx < 1) { return 0; } @@ -65,7 +65,7 @@ int cblas_isamin(int n, const float *x, int incx) { return min_idx; } -int cblas_idamin(int n, const double *x, int incx) { +int cblas_idamin(int n, const double* x, int incx) { if (n < 1 || incx < 1) { return 0; } @@ -87,7 +87,7 @@ int cblas_idamin(int n, const double *x, int incx) { return min_idx; } -int cblas_icamin(int n, const std::complex *x, int incx) { +int cblas_icamin(int n, const std::complex* x, int incx) { if (n < 1 || incx < 1) { return 0; } @@ -109,7 +109,7 @@ int cblas_icamin(int n, const std::complex *x, int incx) { return min_idx; } -int cblas_izamin(int n, const std::complex *x, int incx) { +int cblas_izamin(int n, const std::complex* x, int incx) { if (n < 1 || incx < 1) { return 0; } @@ -131,7 +131,7 @@ int cblas_izamin(int n, const std::complex *x, int incx) { return min_idx; } -void cblas_csrot(const int n, std::complex *cx, const int incx, std::complex *cy, +void cblas_csrot(const int n, std::complex* cx, const int incx, std::complex* cy, const int incy, const float c, const float s) { if (n < 1) return; @@ -158,7 +158,7 @@ void cblas_csrot(const int n, std::complex *cx, const int incx, std::comp } } -void cblas_zdrot(const int n, std::complex *zx, const int incx, std::complex *zy, +void cblas_zdrot(const int n, std::complex* zx, const int incx, std::complex* zy, const int incy, const double c, const double s) { if (n < 1) return; @@ -185,8 +185,8 @@ void cblas_zdrot(const int n, std::complex *zx, const int incx, std::com } } -void cblas_crotg(std::complex *ca, const std::complex *cb, float *c, - std::complex *s) { +void cblas_crotg(std::complex* ca, const std::complex* cb, float* c, + std::complex* s) { if (std::abs(ca[0]) == 0) { c[0] = 0.0; s[0] = std::complex(1.0, 0.0); @@ -203,8 +203,8 @@ void cblas_crotg(std::complex *ca, const std::complex *cb, float * } } -void cblas_zrotg(std::complex *ca, const std::complex *cb, double *c, - std::complex *s) { +void cblas_zrotg(std::complex* ca, const std::complex* cb, double* c, + std::complex* s) { if (std::abs(ca[0]) == 0) { c[0] = 0.0; s[0] = std::complex(1.0, 0.0); diff --git a/src/blas/backends/netlib/netlib_level1.cxx b/src/blas/backends/netlib/netlib_level1.cxx index 9f953dc5b..5514a86c1 100644 --- a/src/blas/backends/netlib/netlib_level1.cxx +++ b/src/blas/backends/netlib/netlib_level1.cxx @@ -19,9 +19,9 @@ // Buffer APIs -void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -31,9 +31,9 @@ void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -43,9 +43,9 @@ void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void asum(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -55,9 +55,9 @@ void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void asum(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -67,9 +67,9 @@ void asum(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void axpy(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void axpy(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -79,9 +79,9 @@ void axpy(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, }); } -void axpy(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void axpy(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -91,34 +91,34 @@ void axpy(sycl::queue &queue, int64_t n, double alpha, sycl::buffer & }); } -void axpy(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void axpy(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { - ::cblas_caxpy((const int)n, (const void *)&alpha, accessor_x.GET_MULTI_PTR, + ::cblas_caxpy((const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void axpy(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void axpy(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { - ::cblas_zaxpy((const int)n, (const void *)&alpha, accessor_x.GET_MULTI_PTR, + ::cblas_zaxpy((const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -127,8 +127,8 @@ void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x #endif } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -137,9 +137,9 @@ void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer #endif } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -148,9 +148,9 @@ void axpby(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -159,9 +159,9 @@ void axpby(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -171,9 +171,9 @@ void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -183,9 +183,9 @@ void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void copy(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -195,9 +195,9 @@ void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void copy(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -207,9 +207,9 @@ void copy(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -221,9 +221,9 @@ void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, }); } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -235,9 +235,9 @@ void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -249,10 +249,10 @@ void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, }); } -void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { - queue.submit([&](sycl::handler &cgh) { +void dotc(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -264,10 +264,10 @@ void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { - queue.submit([&](sycl::handler &cgh) { +void dotc(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -279,10 +279,10 @@ void dotc(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { - queue.submit([&](sycl::handler &cgh) { +void dotu(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -294,10 +294,10 @@ void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &result) { - queue.submit([&](sycl::handler &cgh) { +void dotu(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -309,9 +309,9 @@ void dotu(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -320,9 +320,9 @@ void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.template get_access(cgh); auto accessor_result = result.template get_access(cgh); host_task(cgh, [=]() { @@ -331,9 +331,9 @@ void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t in }); } -void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamin(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -342,9 +342,9 @@ void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamin(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -353,9 +353,9 @@ void iamin(sycl::queue &queue, int64_t n, sycl::buffer, 1> }); } -void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -364,9 +364,9 @@ void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -375,9 +375,9 @@ void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t in }); } -void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamax(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -386,9 +386,9 @@ void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void iamax(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -397,9 +397,9 @@ void iamax(sycl::queue &queue, int64_t n, sycl::buffer, 1> }); } -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.template get_access(cgh); auto accessor_result = result.template get_access(cgh); host_task(cgh, [=]() { @@ -409,9 +409,9 @@ void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -421,9 +421,9 @@ void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -433,9 +433,9 @@ void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void nrm2(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_result = result.get_access(cgh); host_task(cgh, [=]() { @@ -445,9 +445,9 @@ void nrm2(sycl::queue &queue, int64_t n, sycl::buffer, 1> & }); } -void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, float c, float s) { - queue.submit([&](sycl::handler &cgh) { +void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, float c, float s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -457,9 +457,9 @@ void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, }); } -void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, double c, double s) { - queue.submit([&](sycl::handler &cgh) { +void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, double c, double s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -469,9 +469,9 @@ void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, float c, float s) { - queue.submit([&](sycl::handler &cgh) { +void rot(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, float c, float s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -482,9 +482,9 @@ void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, }); } -void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, double c, double s) { - queue.submit([&](sycl::handler &cgh) { +void rot(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, double c, double s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -495,9 +495,9 @@ void rot(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { - queue.submit([&](sycl::handler &cgh) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -509,9 +509,9 @@ void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer }); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { - queue.submit([&](sycl::handler &cgh) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -523,10 +523,10 @@ void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { - queue.submit([&](sycl::handler &cgh) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -538,10 +538,10 @@ void rotg(sycl::queue &queue, sycl::buffer, 1> &a, }); } -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { - queue.submit([&](sycl::handler &cgh) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -553,9 +553,9 @@ void rotg(sycl::queue &queue, sycl::buffer, 1> &a, }); } -void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { - queue.submit([&](sycl::handler &cgh) { +void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_param = param.get_access(cgh); @@ -566,9 +566,9 @@ void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { - queue.submit([&](sycl::handler &cgh) { +void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_param = param.get_access(cgh); @@ -579,9 +579,9 @@ void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, float y1, sycl::buffer ¶m) { - queue.submit([&](sycl::handler &cgh) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, float y1, sycl::buffer& param) { + queue.submit([&](sycl::handler& cgh) { auto accessor_d1 = d1.get_access(cgh); auto accessor_d2 = d2.get_access(cgh); auto accessor_x1 = x1.get_access(cgh); @@ -593,9 +593,9 @@ void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, double y1, sycl::buffer ¶m) { - queue.submit([&](sycl::handler &cgh) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, double y1, sycl::buffer& param) { + queue.submit([&](sycl::handler& cgh) { auto accessor_d1 = d1.get_access(cgh); auto accessor_d2 = d2.get_access(cgh); auto accessor_x1 = x1.get_access(cgh); @@ -607,8 +607,8 @@ void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void scal(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { ::cblas_sscal((const int)n, (const float)alpha, accessor_x.GET_MULTI_PTR, @@ -617,8 +617,8 @@ void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, }); } -void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void scal(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { ::cblas_dscal((const int)n, (const double)alpha, accessor_x.GET_MULTI_PTR, @@ -627,20 +627,20 @@ void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer & }); } -void scal(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void scal(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { - ::cblas_cscal((const int)n, (const void *)&alpha, accessor_x.GET_MULTI_PTR, + ::cblas_cscal((const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)std::abs(incx)); }); }); } -void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, int64_t n, float alpha, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { ::cblas_csscal((const int)n, (const float)alpha, accessor_x.GET_MULTI_PTR, @@ -649,20 +649,20 @@ void scal(sycl::queue &queue, int64_t n, float alpha, sycl::buffer alpha, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void scal(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { - ::cblas_zscal((const int)n, (const void *)&alpha, accessor_x.GET_MULTI_PTR, + ::cblas_zscal((const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)std::abs(incx)); }); }); } -void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer, 1> &x, +void scal(sycl::queue& queue, int64_t n, double alpha, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { ::cblas_zdscal((const int)n, (const double)alpha, accessor_x.GET_MULTI_PTR, @@ -671,9 +671,9 @@ void scal(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { - queue.submit([&](sycl::handler &cgh) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_result = result.get_access(cgh); @@ -685,9 +685,9 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, }); } -void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -697,9 +697,9 @@ void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx }); } -void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -709,9 +709,9 @@ void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t inc }); } -void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void swap(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -721,9 +721,9 @@ void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x }); } -void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void swap(sycl::queue& queue, int64_t n, sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { @@ -735,9 +735,9 @@ void swap(sycl::queue &queue, int64_t n, sycl::buffer, 1> & // USM APIs -sycl::event asum(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event asum(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -748,9 +748,9 @@ sycl::event asum(sycl::queue &queue, int64_t n, const float *x, int64_t incx, fl return done; } -sycl::event asum(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event asum(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -761,9 +761,9 @@ sycl::event asum(sycl::queue &queue, int64_t n, const double *x, int64_t incx, d return done; } -sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - float *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event asum(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + float* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -774,9 +774,9 @@ sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - double *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event asum(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + double* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -787,9 +787,9 @@ sycl::event asum(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event axpy(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event axpy(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -801,9 +801,9 @@ sycl::event axpy(sycl::queue &queue, int64_t n, float alpha, const float *x, int return done; } -sycl::event axpy(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double *y, int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event axpy(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double* y, int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -816,41 +816,41 @@ sycl::event axpy(sycl::queue &queue, int64_t n, double alpha, const double *x, i return done; } -sycl::event axpy(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event axpy(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_caxpy((const int)n, (const void *)&alpha, x, (const int)incx, y, + ::cblas_caxpy((const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy); }); }); return done; } -sycl::event axpy(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event axpy(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_zaxpy((const int)n, (const void *)&alpha, x, (const int)incx, y, + ::cblas_zaxpy((const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy); }); }); return done; } -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -859,9 +859,9 @@ sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, in #endif } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -870,10 +870,10 @@ sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, #endif } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -882,10 +882,10 @@ sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "axpby", "for column_major layout"); #endif @@ -894,9 +894,9 @@ sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, #endif } -sycl::event copy(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event copy(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -907,9 +907,9 @@ sycl::event copy(sycl::queue &queue, int64_t n, const float *x, int64_t incx, fl return done; } -sycl::event copy(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event copy(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -920,10 +920,10 @@ sycl::event copy(sycl::queue &queue, int64_t n, const double *x, int64_t incx, d return done; } -sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event copy(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -934,10 +934,10 @@ sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event copy(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -948,9 +948,9 @@ sycl::event copy(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, float *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, float* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -962,9 +962,9 @@ sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, con return done; } -sycl::event dot(sycl::queue &queue, int64_t n, const double *x, int64_t incx, const double *y, - int64_t incy, double *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dot(sycl::queue& queue, int64_t n, const double* x, int64_t incx, const double* y, + int64_t incy, double* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -976,9 +976,9 @@ sycl::event dot(sycl::queue &queue, int64_t n, const double *x, int64_t incx, co return done; } -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -990,10 +990,10 @@ sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, con return done; } -sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dotc(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1005,10 +1005,10 @@ sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dotc(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1020,10 +1020,10 @@ sycl::event dotc(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dotu(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1035,10 +1035,10 @@ sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - const std::complex *y, int64_t incy, std::complex *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event dotu(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + const std::complex* y, int64_t incy, std::complex* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1050,9 +1050,9 @@ sycl::event dotu(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event iamin(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamin(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1063,9 +1063,9 @@ sycl::event iamin(sycl::queue &queue, int64_t n, const float *x, int64_t incx, i return done; } -sycl::event iamin(sycl::queue &queue, int64_t n, const double *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamin(sycl::queue& queue, int64_t n, const double* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1076,9 +1076,9 @@ sycl::event iamin(sycl::queue &queue, int64_t n, const double *x, int64_t incx, return done; } -sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamin(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1089,9 +1089,9 @@ sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamin(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1102,9 +1102,9 @@ sycl::event iamin(sycl::queue &queue, int64_t n, const std::complex *x, return done; } -sycl::event iamax(sycl::queue &queue, int64_t n, const float *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamax(sycl::queue& queue, int64_t n, const float* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1115,9 +1115,9 @@ sycl::event iamax(sycl::queue &queue, int64_t n, const float *x, int64_t incx, i return done; } -sycl::event iamax(sycl::queue &queue, int64_t n, const double *x, int64_t incx, int64_t *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamax(sycl::queue& queue, int64_t n, const double* x, int64_t incx, int64_t* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1128,9 +1128,9 @@ sycl::event iamax(sycl::queue &queue, int64_t n, const double *x, int64_t incx, return done; } -sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamax(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1141,9 +1141,9 @@ sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - int64_t *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event iamax(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + int64_t* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1154,9 +1154,9 @@ sycl::event iamax(sycl::queue &queue, int64_t n, const std::complex *x, return done; } -sycl::event nrm2(sycl::queue &queue, int64_t n, const float *x, int64_t incx, float *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event nrm2(sycl::queue& queue, int64_t n, const float* x, int64_t incx, float* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1167,9 +1167,9 @@ sycl::event nrm2(sycl::queue &queue, int64_t n, const float *x, int64_t incx, fl return done; } -sycl::event nrm2(sycl::queue &queue, int64_t n, const double *x, int64_t incx, double *result, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event nrm2(sycl::queue& queue, int64_t n, const double* x, int64_t incx, double* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1180,9 +1180,9 @@ sycl::event nrm2(sycl::queue &queue, int64_t n, const double *x, int64_t incx, d return done; } -sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - float *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event nrm2(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + float* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1193,9 +1193,9 @@ sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, in return done; } -sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, int64_t incx, - double *result, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event nrm2(sycl::queue& queue, int64_t n, const std::complex* x, int64_t incx, + double* result, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1206,9 +1206,9 @@ sycl::event nrm2(sycl::queue &queue, int64_t n, const std::complex *x, i return done; } -sycl::event rot(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - float c, float s, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rot(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + float c, float s, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1221,9 +1221,9 @@ sycl::event rot(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, return done; } -sycl::event rot(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - double c, double s, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rot(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + double c, double s, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1236,10 +1236,10 @@ sycl::event rot(sycl::queue &queue, int64_t n, double *x, int64_t incx, double * return done; } -sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, float c, float s, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rot(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, float c, float s, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1252,10 +1252,10 @@ sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t i return done; } -sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, double c, double s, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rot(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, double c, double s, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1268,9 +1268,9 @@ sycl::event rot(sycl::queue &queue, int64_t n, std::complex *x, int64_t return done; } -sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotg(sycl::queue& queue, float* a, float* b, float* c, float* s, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1280,9 +1280,9 @@ sycl::event rotg(sycl::queue &queue, float *a, float *b, float *c, float *s, return done; } -sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotg(sycl::queue& queue, double* a, double* b, double* c, double* s, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1292,9 +1292,9 @@ sycl::event rotg(sycl::queue &queue, double *a, double *b, double *c, double *s, return done; } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, float *c, - std::complex *s, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, float* c, + std::complex* s, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1304,9 +1304,9 @@ sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex return done; } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, double *c, - std::complex *s, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, double* c, + std::complex* s, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1316,9 +1316,9 @@ sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotm(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + float* param, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1330,9 +1330,9 @@ sycl::event rotm(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y return done; } -sycl::event rotm(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - double *param, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotm(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + double* param, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1344,9 +1344,9 @@ sycl::event rotm(sycl::queue &queue, int64_t n, double *x, int64_t incx, double return done; } -sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, float *param, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotmg(sycl::queue& queue, float* d1, float* d2, float* x1, float y1, float* param, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1357,9 +1357,9 @@ sycl::event rotmg(sycl::queue &queue, float *d1, float *d2, float *x1, float y1, return done; } -sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double y1, double *param, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event rotmg(sycl::queue& queue, double* d1, double* d2, double* x1, double y1, double* param, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1370,9 +1370,9 @@ sycl::event rotmg(sycl::queue &queue, double *d1, double *d2, double *x1, double return done; } -sycl::event scal(sycl::queue &queue, int64_t n, float alpha, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, float alpha, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1384,9 +1384,9 @@ sycl::event scal(sycl::queue &queue, int64_t n, float alpha, float *x, int64_t i return done; } -sycl::event scal(sycl::queue &queue, int64_t n, double alpha, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, double alpha, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1398,23 +1398,23 @@ sycl::event scal(sycl::queue &queue, int64_t n, double alpha, double *x, int64_t return done; } -sycl::event scal(sycl::queue &queue, int64_t n, std::complex alpha, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, std::complex alpha, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_cscal((const int)n, (const void *)&alpha, x, (const int)std::abs(incx)); + ::cblas_cscal((const int)n, (const void*)&alpha, x, (const int)std::abs(incx)); }); }); return done; } -sycl::event scal(sycl::queue &queue, int64_t n, float alpha, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, float alpha, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1426,23 +1426,23 @@ sycl::event scal(sycl::queue &queue, int64_t n, float alpha, std::complex return done; } -sycl::event scal(sycl::queue &queue, int64_t n, std::complex alpha, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, std::complex alpha, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_zscal((const int)n, (const void *)&alpha, x, (const int)std::abs(incx)); + ::cblas_zscal((const int)n, (const void*)&alpha, x, (const int)std::abs(incx)); }); }); return done; } -sycl::event scal(sycl::queue &queue, int64_t n, double alpha, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event scal(sycl::queue& queue, int64_t n, double alpha, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1454,10 +1454,10 @@ sycl::event scal(sycl::queue &queue, int64_t n, double alpha, std::complex &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1470,9 +1470,9 @@ sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int6 return done; } -sycl::event swap(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event swap(sycl::queue& queue, int64_t n, float* x, int64_t incx, float* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1483,9 +1483,9 @@ sycl::event swap(sycl::queue &queue, int64_t n, float *x, int64_t incx, float *y return done; } -sycl::event swap(sycl::queue &queue, int64_t n, double *x, int64_t incx, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event swap(sycl::queue& queue, int64_t n, double* x, int64_t incx, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1496,10 +1496,10 @@ sycl::event swap(sycl::queue &queue, int64_t n, double *x, int64_t incx, double return done; } -sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event swap(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1510,10 +1510,10 @@ sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t return done; } -sycl::event swap(sycl::queue &queue, int64_t n, std::complex *x, int64_t incx, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event swap(sycl::queue& queue, int64_t n, std::complex* x, int64_t incx, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); diff --git a/src/blas/backends/netlib/netlib_level2.cxx b/src/blas/backends/netlib/netlib_level2.cxx index 156ed133b..8e8d74446 100644 --- a/src/blas/backends/netlib/netlib_level2.cxx +++ b/src/blas/backends/netlib/netlib_level2.cxx @@ -19,10 +19,10 @@ // Buffer APIs -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, float beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, float beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -36,10 +36,10 @@ void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, }); } -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, double beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, double beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -53,46 +53,46 @@ void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, }); } -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_cgbmv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const int)kl, (const int)ku, (const void *)&alpha, + (const int)kl, (const int)ku, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_x.GET_MULTI_PTR, - (const int)incx, (const void *)&beta, accessor_y.GET_MULTI_PTR, + (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zgbmv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const int)kl, (const int)ku, (const void *)&alpha, + (const int)kl, (const int)ku, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_x.GET_MULTI_PTR, - (const int)incx, (const void *)&beta, accessor_y.GET_MULTI_PTR, + (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -105,10 +105,10 @@ void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha }); } -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -121,44 +121,44 @@ void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alph }); } -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_cgemv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zgemv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, int64_t m, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); @@ -170,10 +170,10 @@ void ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void ger(sycl::queue& queue, int64_t m, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); @@ -185,142 +185,142 @@ void ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, sycl::buffer alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { - ::cblas_cgerc(MAJOR, (const int)m, (const int)n, (const void *)&alpha, + ::cblas_cgerc(MAJOR, (const int)m, (const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { - ::cblas_zgerc(MAJOR, (const int)m, (const int)n, (const void *)&alpha, + ::cblas_zgerc(MAJOR, (const int)m, (const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { - ::cblas_cgeru(MAJOR, (const int)m, (const int)n, (const void *)&alpha, + ::cblas_cgeru(MAJOR, (const int)m, (const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { - ::cblas_zgeru(MAJOR, (const int)m, (const int)n, (const void *)&alpha, + ::cblas_zgeru(MAJOR, (const int)m, (const int)n, (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_chbmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, (const int)k, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhbmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, (const int)k, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_chemv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhemv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_x.GET_MULTI_PTR, (const int)incx, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_x.GET_MULTI_PTR, (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void her(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { @@ -331,10 +331,10 @@ void her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, }); } -void her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void her(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { @@ -345,78 +345,78 @@ void her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, }); } -void her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { ::cblas_cher2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, + (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { ::cblas_zher2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, + (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_a.GET_MULTI_PTR, (const int)lda); }); }); } -void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, - int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, + int64_t incx, std::complex beta, sycl::buffer, 1>& y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_chpmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_ap.GET_MULTI_PTR, accessor_x.GET_MULTI_PTR, - (const int)incx, (const void *)&beta, accessor_y.GET_MULTI_PTR, + (const void*)&alpha, accessor_ap.GET_MULTI_PTR, accessor_x.GET_MULTI_PTR, + (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, - int64_t incx, std::complex beta, sycl::buffer, 1> &y, +void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, + int64_t incx, std::complex beta, sycl::buffer, 1>& y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhpmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_ap.GET_MULTI_PTR, accessor_x.GET_MULTI_PTR, - (const int)incx, (const void *)&beta, accessor_y.GET_MULTI_PTR, + (const void*)&alpha, accessor_ap.GET_MULTI_PTR, accessor_x.GET_MULTI_PTR, + (const int)incx, (const void*)&beta, accessor_y.GET_MULTI_PTR, (const int)incy); }); }); } -void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &ap) { - queue.submit([&](sycl::handler &cgh) { +void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { @@ -427,10 +427,10 @@ void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, }); } -void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &ap) { - queue.submit([&](sycl::handler &cgh) { +void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { @@ -441,42 +441,42 @@ void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, }); } -void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &ap) { - queue.submit([&](sycl::handler &cgh) { +void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { ::cblas_chpr2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, + (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_ap.GET_MULTI_PTR); }); }); } -void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &ap) { - queue.submit([&](sycl::handler &cgh) { +void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhpr2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, + (const void*)&alpha, accessor_x.GET_MULTI_PTR, (const int)incx, accessor_y.GET_MULTI_PTR, (const int)incy, accessor_ap.GET_MULTI_PTR); }); }); } -void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -489,10 +489,10 @@ void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alph }); } -void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -505,10 +505,10 @@ void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alp }); } -void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &ap, - sycl::buffer &x, int64_t incx, float beta, sycl::buffer &y, +void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& ap, + sycl::buffer& x, int64_t incx, float beta, sycl::buffer& y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -521,10 +521,10 @@ void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::bu }); } -void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - sycl::buffer &ap, sycl::buffer &x, int64_t incx, double beta, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + sycl::buffer& ap, sycl::buffer& x, int64_t incx, double beta, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -537,9 +537,9 @@ void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, }); } -void spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &ap) { - queue.submit([&](sycl::handler &cgh) { +void spr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { @@ -550,9 +550,9 @@ void spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buf }); } -void spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &ap) { - queue.submit([&](sycl::handler &cgh) { +void spr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_ap = ap.get_access(cgh); host_task(cgh, [=]() { @@ -563,9 +563,9 @@ void spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::bu }); } -void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &ap) { - queue.submit([&](sycl::handler &cgh) { +void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_ap = ap.get_access(cgh); @@ -577,9 +577,9 @@ void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::bu }); } -void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &ap) { - queue.submit([&](sycl::handler &cgh) { +void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& ap) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_ap = ap.get_access(cgh); @@ -591,10 +591,10 @@ void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::b }); } -void symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx, float beta, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void symv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx, float beta, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -607,10 +607,10 @@ void symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::bu }); } -void symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &a, - int64_t lda, sycl::buffer &x, int64_t incx, double beta, - sycl::buffer &y, int64_t incy) { - queue.submit([&](sycl::handler &cgh) { +void symv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& a, + int64_t lda, sycl::buffer& x, int64_t incx, double beta, + sycl::buffer& y, int64_t incy) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); @@ -623,9 +623,9 @@ void symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::b }); } -void syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void syr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { @@ -636,9 +636,9 @@ void syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buf }); } -void syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { +void syr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& a, int64_t lda) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_a = a.get_access(cgh); host_task(cgh, [=]() { @@ -649,10 +649,10 @@ void syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::bu }); } -void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); @@ -665,10 +665,10 @@ void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, sycl::bu }); } -void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_x = x.get_access(cgh); auto accessor_y = y.get_access(cgh); auto accessor_a = a.get_access(cgh); @@ -681,10 +681,10 @@ void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, sycl::b }); } -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -696,10 +696,10 @@ void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -711,10 +711,10 @@ void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -726,10 +726,10 @@ void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -741,10 +741,10 @@ void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -756,10 +756,10 @@ void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -771,10 +771,10 @@ void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -786,10 +786,10 @@ void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -801,9 +801,9 @@ void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &ap, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& ap, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -814,9 +814,9 @@ void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &ap, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& ap, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -827,10 +827,10 @@ void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -841,10 +841,10 @@ void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, +void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -855,9 +855,9 @@ void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &ap, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& ap, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -868,9 +868,9 @@ void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &ap, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& ap, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -881,10 +881,10 @@ void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -895,10 +895,10 @@ void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &ap, sycl::buffer, 1> &x, +void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& ap, sycl::buffer, 1>& x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_ap = ap.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -909,9 +909,9 @@ void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -922,9 +922,9 @@ void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag }); } -void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -935,10 +935,10 @@ void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag }); } -void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -949,10 +949,10 @@ void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag }); } -void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -963,9 +963,9 @@ void trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag }); } -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -976,9 +976,9 @@ void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -989,10 +989,10 @@ void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -1003,10 +1003,10 @@ void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, }); } -void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { - queue.submit([&](sycl::handler &cgh) { +void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_x = x.get_access(cgh); host_task(cgh, [=]() { @@ -1019,10 +1019,10 @@ void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, // USM APIs -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - float alpha, const float *a, int64_t lda, const float *x, int64_t incx, float beta, - float *y, int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + float alpha, const float* a, int64_t lda, const float* x, int64_t incx, float beta, + float* y, int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1036,11 +1036,11 @@ sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int6 return done; } -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - double alpha, const double *a, int64_t lda, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + double alpha, const double* a, int64_t lda, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1054,48 +1054,48 @@ sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int6 return done; } -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cgbmv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const int)kl, (const int)ku, (const void *)&alpha, a, (const int)lda, x, - (const int)incx, (const void *)&beta, y, (const int)incy); + (const int)kl, (const int)ku, (const void*)&alpha, a, (const int)lda, x, + (const int)incx, (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zgbmv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const int)kl, (const int)ku, (const void *)&alpha, a, (const int)lda, x, - (const int)incx, (const void *)&beta, y, (const int)incy); + (const int)kl, (const int)ku, (const void*)&alpha, a, (const int)lda, x, + (const int)incx, (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + const float* a, int64_t lda, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1109,10 +1109,10 @@ sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, floa return done; } -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + const double* a, int64_t lda, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1126,48 +1126,48 @@ sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, doub return done; } -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cgemv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zgemv(MAJOR, convert_to_cblas_trans(trans), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, const float *x, int64_t incx, - const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event ger(sycl::queue& queue, int64_t m, int64_t n, float alpha, const float* x, int64_t incx, + const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1180,10 +1180,10 @@ sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, float alpha, const flo return done; } -sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event ger(sycl::queue& queue, int64_t m, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1196,152 +1196,152 @@ sycl::event ger(sycl::queue &queue, int64_t m, int64_t n, double alpha, const do return done; } -sycl::event gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_cgerc(MAJOR, (const int)m, (const int)n, (const void *)&alpha, x, + ::cblas_cgerc(MAJOR, (const int)m, (const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event gerc(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gerc(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_zgerc(MAJOR, (const int)m, (const int)n, (const void *)&alpha, x, + ::cblas_zgerc(MAJOR, (const int)m, (const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_cgeru(MAJOR, (const int)m, (const int)n, (const void *)&alpha, x, + ::cblas_cgeru(MAJOR, (const int)m, (const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event geru(sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event geru(sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { - ::cblas_zgeru(MAJOR, (const int)m, (const int)n, (const void *)&alpha, x, + ::cblas_zgeru(MAJOR, (const int)m, (const int)n, (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_chbmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, (const int)k, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zhbmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, (const int)k, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *x, - int64_t incx, std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, const std::complex* x, + int64_t incx, std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_chemv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, const std::complex *x, - int64_t incx, std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* a, int64_t lda, const std::complex* x, + int64_t incx, std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zhemv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, a, (const int)lda, x, (const int)incx, - (const void *)&beta, y, (const int)incy); + (const void*)&alpha, a, (const int)lda, x, (const int)incx, + (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - const std::complex *x, int64_t incx, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + const std::complex* x, int64_t incx, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1354,10 +1354,10 @@ sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, return done; } -sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - const std::complex *x, int64_t incx, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + const std::complex* x, int64_t incx, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1370,82 +1370,82 @@ sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cher2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, x, (const int)incx, y, (const int)incy, a, + (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zher2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, x, (const int)incx, y, (const int)incy, a, + (const void*)&alpha, x, (const int)incx, y, (const int)incy, a, (const int)lda); }); }); return done; } -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *ap, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* ap, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_chpmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, ap, x, (const int)incx, (const void *)&beta, y, + (const void*)&alpha, ap, x, (const int)incx, (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *ap, const std::complex *x, int64_t incx, - std::complex beta, std::complex *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* ap, const std::complex* x, int64_t incx, + std::complex beta, std::complex* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zhpmv(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, ap, x, (const int)incx, (const void *)&beta, y, + (const void*)&alpha, ap, x, (const int)incx, (const void*)&beta, y, (const int)incy); }); }); return done; } -sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, - const std::complex *x, int64_t incx, std::complex *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, + const std::complex* x, int64_t incx, std::complex* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1458,10 +1458,10 @@ sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, return done; } -sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, - const std::complex *x, int64_t incx, std::complex *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, + const std::complex* x, int64_t incx, std::complex* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1474,44 +1474,44 @@ sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_chpr2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, x, (const int)incx, y, (const int)incy, ap); + (const void*)&alpha, x, (const int)incx, y, (const int)incy, ap); }); }); return done; } -sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zhpr2(MAJOR, convert_to_cblas_uplo(upper_lower), (const int)n, - (const void *)&alpha, x, (const int)incx, y, (const int)incy, ap); + (const void*)&alpha, x, (const int)incx, y, (const int)incy, ap); }); }); return done; } -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, const float *x, int64_t incx, float beta, float *y, - int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, float alpha, + const float* a, int64_t lda, const float* x, int64_t incx, float beta, float* y, + int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1525,10 +1525,10 @@ sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, flo return done; } -sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, const double *x, int64_t incx, double beta, - double *y, int64_t incy, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, double alpha, + const double* a, int64_t lda, const double* x, int64_t incx, double beta, + double* y, int64_t incy, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1542,10 +1542,10 @@ sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, dou return done; } -sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *ap, - const float *x, int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* ap, + const float* x, int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1559,10 +1559,10 @@ sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, c return done; } -sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *ap, - const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* ap, + const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1576,9 +1576,9 @@ sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, float *ap, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, float* ap, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1591,9 +1591,9 @@ sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, co return done; } -sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, double *ap, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, double* ap, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1606,10 +1606,10 @@ sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, c return done; } -sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, const float *y, int64_t incy, float *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, const float* y, int64_t incy, float* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1622,10 +1622,10 @@ sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, c return done; } -sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *ap, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* ap, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1638,10 +1638,10 @@ sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *a, - int64_t lda, const float *x, int64_t incx, float beta, float *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* a, + int64_t lda, const float* x, int64_t incx, float beta, float* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1655,10 +1655,10 @@ sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, c return done; } -sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *a, - int64_t lda, const double *x, int64_t incx, double beta, double *y, int64_t incy, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* a, + int64_t lda, const double* x, int64_t incx, double beta, double* y, int64_t incy, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1672,9 +1672,9 @@ sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, float *a, int64_t lda, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, float* a, int64_t lda, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1687,10 +1687,10 @@ sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, co return done; } -sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, double *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, double* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1703,10 +1703,10 @@ sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, c return done; } -sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, const float *x, - int64_t incx, const float *y, int64_t incy, float *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, float alpha, const float* x, + int64_t incx, const float* y, int64_t incy, float* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1720,10 +1720,10 @@ sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, float alpha, c return done; } -sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, const double *x, - int64_t incx, const double *y, int64_t incy, double *a, int64_t lda, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, double alpha, const double* x, + int64_t incx, const double* y, int64_t incy, double* a, int64_t lda, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1737,10 +1737,10 @@ sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, double alpha, return done; } -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1754,10 +1754,10 @@ sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1771,10 +1771,10 @@ sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1788,10 +1788,10 @@ sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1805,10 +1805,10 @@ sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1822,10 +1822,10 @@ sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1839,10 +1839,10 @@ sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1856,10 +1856,10 @@ sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - int64_t k, const std::complex *a, int64_t lda, std::complex *x, - int64_t incx, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + int64_t k, const std::complex* a, int64_t lda, std::complex* x, + int64_t incx, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1873,10 +1873,10 @@ sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *ap, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* ap, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1889,10 +1889,10 @@ sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *ap, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* ap, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1905,10 +1905,10 @@ sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *ap, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* ap, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1921,10 +1921,10 @@ sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *ap, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* ap, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1937,10 +1937,10 @@ sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *ap, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* ap, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1953,10 +1953,10 @@ sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *ap, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* ap, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1969,10 +1969,10 @@ sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *ap, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* ap, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1985,10 +1985,10 @@ sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *ap, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* ap, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2001,10 +2001,10 @@ sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - const float *a, int64_t lda, float *b, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + const float* a, int64_t lda, float* b, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2018,10 +2018,10 @@ sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag un return done; } -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - const double *a, int64_t lda, double *b, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + const double* a, int64_t lda, double* b, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2035,10 +2035,10 @@ sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag un return done; } -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *b, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* b, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2052,10 +2052,10 @@ sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag un return done; } -sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *b, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose transa, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* b, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2069,10 +2069,10 @@ sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose transa, diag un return done; } -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const float *a, int64_t lda, float *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const float* a, int64_t lda, float* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2086,10 +2086,10 @@ sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const double *a, int64_t lda, double *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const double* a, int64_t lda, double* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2103,10 +2103,10 @@ sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -2120,10 +2120,10 @@ sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag uni return done; } -sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, - const std::complex *a, int64_t lda, std::complex *x, int64_t incx, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, + const std::complex* a, int64_t lda, std::complex* x, int64_t incx, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); diff --git a/src/blas/backends/netlib/netlib_level3.cxx b/src/blas/backends/netlib/netlib_level3.cxx index 8bb6a04ae..2579e66e1 100644 --- a/src/blas/backends/netlib/netlib_level3.cxx +++ b/src/blas/backends/netlib/netlib_level3.cxx @@ -19,10 +19,10 @@ // Buffer APIs -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -36,10 +36,10 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int }); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, double beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -53,46 +53,46 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int }); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_cgemm(MAJOR, convert_to_cblas_trans(transa), convert_to_cblas_trans(transb), - (const int)m, (const int)n, (const int)k, (const void *)&alpha, + (const int)m, (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, - (const int)ldb, (const void *)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zgemm(MAJOR, convert_to_cblas_trans(transa), convert_to_cblas_trans(transb), - (const int)m, (const int)n, (const int)k, (const void *)&alpha, + (const int)m, (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, - (const int)ldb, (const void *)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - sycl::half alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, sycl::half beta, - sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + sycl::half alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, sycl::half beta, + sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -101,9 +101,9 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int #endif } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -112,9 +112,9 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int #endif } -void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - float alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, float beta, sycl::buffer &c, int64_t ldc) { +void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, + float alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -123,46 +123,46 @@ void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int #endif } -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_chemm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_b.GET_MULTI_PTR, (const int)ldb, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_b.GET_MULTI_PTR, (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zhemm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_b.GET_MULTI_PTR, (const int)ldb, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_b.GET_MULTI_PTR, (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer, 1> &a, int64_t lda, float beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer, 1>& a, int64_t lda, float beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { @@ -174,10 +174,10 @@ void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int6 }); } -void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, - sycl::buffer, 1> &a, int64_t lda, double beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, + sycl::buffer, 1>& a, int64_t lda, double beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { @@ -189,17 +189,17 @@ void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int6 }); } -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, float beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, float beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_cher2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb, (const float)beta, accessor_c.GET_MULTI_PTR, (const int)ldc); @@ -207,17 +207,17 @@ void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int }); } -void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, double beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, double beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zher2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb, (const double)beta, accessor_c.GET_MULTI_PTR, (const int)ldc); @@ -225,10 +225,10 @@ void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int }); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - float beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + float beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -242,10 +242,10 @@ void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int6 }); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, double alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - double beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, double alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + double beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -259,46 +259,46 @@ void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int6 }); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_csymm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_b.GET_MULTI_PTR, (const int)ldb, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_b.GET_MULTI_PTR, (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zsymm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, - accessor_b.GET_MULTI_PTR, (const int)ldb, (const void *)&beta, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + accessor_b.GET_MULTI_PTR, (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, float beta, sycl::buffer &c, +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, float beta, sycl::buffer& c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { @@ -310,10 +310,10 @@ void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int6 }); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, - sycl::buffer &a, int64_t lda, double beta, sycl::buffer &c, +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, double alpha, + sycl::buffer& a, int64_t lda, double beta, sycl::buffer& c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { @@ -325,40 +325,40 @@ void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int6 }); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_csyrk(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, - accessor_a.GET_MULTI_PTR, (const int)lda, (const void *)&beta, - accessor_c.GET_MULTI_PTR, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, + (const int)lda, (const void*)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldc); }); }); } -void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zsyrk(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, - accessor_a.GET_MULTI_PTR, (const int)lda, (const void *)&beta, - accessor_c.GET_MULTI_PTR, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, + (const int)lda, (const void*)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldc); }); }); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb, - float beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, float alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb, + float beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -371,10 +371,10 @@ void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int }); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, double beta, sycl::buffer &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); @@ -388,46 +388,46 @@ void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int }); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_csyr2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, - (const int)ldb, (const void *)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, int64_t ldc) { - queue.submit([&](sycl::handler &cgh) { +void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, int64_t ldc) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); auto accessor_c = c.get_access(cgh); host_task(cgh, [=]() { ::cblas_zsyr2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, + (const int)n, (const int)k, (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, - (const int)ldb, (const void *)&beta, accessor_c.GET_MULTI_PTR, + (const int)ldb, (const void*)&beta, accessor_c.GET_MULTI_PTR, (const int)ldc); }); }); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -440,10 +440,10 @@ void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans }); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -456,43 +456,43 @@ void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans }); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { ::cblas_ctrmm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb); }); }); } -void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { ::cblas_ztrmm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb); }); }); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -505,10 +505,10 @@ void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans }); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { @@ -521,34 +521,34 @@ void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans }); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, - int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, + int64_t m, int64_t n, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { ::cblas_ctrsm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb); }); }); } -void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, +void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &b, int64_t ldb) { - queue.submit([&](sycl::handler &cgh) { + sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& b, int64_t ldb) { + queue.submit([&](sycl::handler& cgh) { auto accessor_a = a.get_access(cgh); auto accessor_b = b.get_access(cgh); host_task(cgh, [=]() { ::cblas_ztrsm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, + (const void*)&alpha, accessor_a.GET_MULTI_PTR, (const int)lda, accessor_b.GET_MULTI_PTR, (const int)ldb); }); }); @@ -556,10 +556,10 @@ void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans // USM APIs -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, - float beta, float *c, int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, + float beta, float* c, int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -573,11 +573,11 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t return done; } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -591,50 +591,48 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t return done; } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cgemm(MAJOR, convert_to_cblas_trans(transa), convert_to_cblas_trans(transb), - (const int)m, (const int)n, (const int)k, (const void *)&alpha, a, - (const int)lda, b, (const int)ldb, (const void *)&beta, c, - (const int)ldc); + (const int)m, (const int)n, (const int)k, (const void*)&alpha, a, + (const int)lda, b, (const int)ldb, (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zgemm(MAJOR, convert_to_cblas_trans(transa), convert_to_cblas_trans(transb), - (const int)m, (const int)n, (const int)k, (const void *)&alpha, a, - (const int)lda, b, (const int)ldb, (const void *)&beta, c, - (const int)ldc); + (const int)m, (const int)n, (const int)k, (const void*)&alpha, a, + (const int)lda, b, (const int)ldb, (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, sycl::half alpha, const sycl::half *a, int64_t lda, const sycl::half *b, - int64_t ldb, sycl::half beta, sycl::half *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, sycl::half alpha, const sycl::half* a, int64_t lda, const sycl::half* b, + int64_t ldb, sycl::half beta, sycl::half* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -643,10 +641,10 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const sycl::half *a, int64_t lda, const sycl::half *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const sycl::half* a, int64_t lda, const sycl::half* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -655,10 +653,10 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const bfloat16 *a, int64_t lda, const bfloat16 *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, float alpha, const bfloat16* a, int64_t lda, const bfloat16* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { #ifdef COLUMN_MAJOR throw unimplemented("blas", "gemm", "for column_major layout"); #endif @@ -667,12 +665,12 @@ sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t #endif } -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -680,19 +678,19 @@ sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t host_task(cgh, [=]() { ::cblas_chemm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb, - (const void *)&beta, c, (const int)ldc); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -700,18 +698,18 @@ sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t host_task(cgh, [=]() { ::cblas_zhemm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb, - (const void *)&beta, c, (const int)ldc); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const std::complex *a, int64_t lda, float beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const std::complex* a, int64_t lda, float beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -725,11 +723,11 @@ sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const std::complex *a, int64_t lda, double beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const std::complex* a, int64_t lda, double beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -743,46 +741,46 @@ sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, float beta, std::complex *c, - int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, float beta, std::complex* c, + int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_cher2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, b, + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, b, (const int)ldb, (const float)beta, c, (const int)ldc); }); }); return done; } -sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, double beta, std::complex *c, - int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, double beta, std::complex* c, + int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zher2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, b, + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, b, (const int)ldb, (const double)beta, c, (const int)ldc); }); }); return done; } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, float beta, - float *c, int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, float beta, + float* c, int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -797,11 +795,11 @@ sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t return done; } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - double alpha, const double *a, int64_t lda, const double *b, int64_t ldb, - double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + double alpha, const double* a, int64_t lda, const double* b, int64_t ldb, + double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -816,12 +814,12 @@ sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t return done; } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -829,19 +827,19 @@ sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t host_task(cgh, [=]() { ::cblas_csymm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb, - (const void *)&beta, c, (const int)ldc); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -849,17 +847,17 @@ sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t host_task(cgh, [=]() { ::cblas_zsymm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb, - (const void *)&beta, c, (const int)ldc); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -873,10 +871,10 @@ sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -890,46 +888,46 @@ sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_csyrk(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, - (const void *)&beta, c, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zsyrk(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, - (const void *)&beta, c, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, + (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, float beta, - float *c, int64_t ldc, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, float beta, + float* c, int64_t ldc, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -943,11 +941,11 @@ sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, const double *b, int64_t ldb, - double beta, double *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + double alpha, const double* a, int64_t lda, const double* b, int64_t ldb, + double beta, double* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -961,48 +959,48 @@ sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t return done; } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_csyr2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, b, - (const int)ldb, (const void *)&beta, c, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, b, + (const int)ldb, (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } host_task(cgh, [=]() { ::cblas_zsyr2k(MAJOR, convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - (const int)n, (const int)k, (const void *)&alpha, a, (const int)lda, b, - (const int)ldb, (const void *)&beta, c, (const int)ldc); + (const int)n, (const int)k, (const void*)&alpha, a, (const int)lda, b, + (const int)ldb, (const void*)&beta, c, (const int)ldc); }); }); return done; } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, int64_t lda, - float *b, int64_t ldb, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, int64_t lda, + float* b, int64_t ldb, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1017,10 +1015,10 @@ sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpos return done; } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, int64_t lda, - double *b, int64_t ldb, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, int64_t lda, + double* b, int64_t ldb, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1035,11 +1033,11 @@ sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpos return done; } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1048,17 +1046,17 @@ sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpos ::cblas_ctrmm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb); }); }); return done; } -sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1067,16 +1065,16 @@ sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpos ::cblas_ztrmm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb); }); }); return done; } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, int64_t m, int64_t n, float alpha, const float *a, int64_t lda, - float *b, int64_t ldb, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, int64_t m, int64_t n, float alpha, const float* a, int64_t lda, + float* b, int64_t ldb, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1091,10 +1089,10 @@ sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpos return done; } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, - diag unit_diag, int64_t m, int64_t n, double alpha, const double *a, int64_t lda, - double *b, int64_t ldb, const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, + diag unit_diag, int64_t m, int64_t n, double alpha, const double* a, int64_t lda, + double* b, int64_t ldb, const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1109,11 +1107,11 @@ sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpos return done; } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1122,17 +1120,17 @@ sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpos ::cblas_ctrsm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb); }); }); return done; } -sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose transa, +sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose transa, diag unit_diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - const std::vector &dependencies) { - auto done = queue.submit([&](sycl::handler &cgh) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + const std::vector& dependencies) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); @@ -1141,7 +1139,7 @@ sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpos ::cblas_ztrsm(MAJOR, convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(transa), convert_to_cblas_diag(unit_diag), (const int)m, (const int)n, - (const void *)&alpha, a, (const int)lda, b, (const int)ldb); + (const void*)&alpha, a, (const int)lda, b, (const int)ldb); }); }); return done; diff --git a/src/blas/backends/portblas/portblas_batch.cxx b/src/blas/backends/portblas/portblas_batch.cxx index 28c7ee5dc..2fe63a127 100644 --- a/src/blas/backends/portblas/portblas_batch.cxx +++ b/src/blas/backends/portblas/portblas_batch.cxx @@ -19,999 +19,999 @@ // Buffer APIs -void syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, +void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "syrk_batch", ""); } -void syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, +void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "syrk_batch", ""); } -void syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "syrk_batch", ""); } -void syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +void syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "syrk_batch", ""); } -void gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "gemv_batch", ""); } -void gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "gemv_batch", ""); } -void gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "gemv_batch", ""); } -void gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "gemv_batch", ""); } -void dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", ""); } -void dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", ""); } -void dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", ""); } -void dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { throw unimplemented("blas", "dgmm_batch", ""); } -void axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer &y, +void axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "axpy_batch", ""); } -void axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "axpy_batch", ""); } -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "copy_batch", ""); } -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, std::int64_t incy, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "copy_batch", ""); } -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "copy_batch", ""); } -void copy_batch(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void copy_batch(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { throw unimplemented("blas", "copy_batch", ""); } -void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for complex"); } -void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for complex"); } -void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for complex"); } -void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for unsupported dtype"); } -void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for unsupported dtype"); } -void gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "gemm_batch", " for unsupported dtype"); } -void trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "trsm_batch", ""); } -void trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "trsm_batch", ""); } -void trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "trsm_batch", ""); } -void trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "trsm_batch", ""); } -void omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", ""); } -void omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +void omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("blas", "omatcopy_batch", ""); } -void imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", ""); } -void imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", ""); } -void imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", ""); } -void imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +void imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", ""); } -void omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, +void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer &c, +void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { CALL_PORTBLAS_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", ""); } -void omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { throw unimplemented("blas", "omatadd_batch", ""); } // USM APIs -sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event syrk_batch(sycl::queue &queue, oneapi::mkl::uplo upper_lower, +sycl::event syrk_batch(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "syrk_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, std::int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, std::int64_t incy, +sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, +sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, +sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, std::complex beta, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float *beta, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float* beta, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double *beta, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double* beta, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event gemv_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemv_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, - std::int64_t n, const float *a, std::int64_t lda, std::int64_t stridea, - const float *x, std::int64_t incx, std::int64_t stridex, float *c, +sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, + std::int64_t n, const float* a, std::int64_t lda, std::int64_t stridea, + const float* x, std::int64_t incx, std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, - std::int64_t n, const double *a, std::int64_t lda, std::int64_t stridea, - const double *x, std::int64_t incx, std::int64_t stridex, double *c, +sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, + std::int64_t n, const double* a, std::int64_t lda, std::int64_t stridea, + const double* x, std::int64_t incx, std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *c, std::int64_t ldc, +sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, - std::int64_t n, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *c, std::int64_t ldc, +sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, + std::int64_t n, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, - std::int64_t *n, const float **a, std::int64_t *lda, const float **x, - std::int64_t *incx, float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, + std::int64_t* n, const float** a, std::int64_t* lda, const float** x, + std::int64_t* incx, float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, - std::int64_t *n, const double **a, std::int64_t *lda, const double **x, - std::int64_t *incx, double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, + std::int64_t* n, const double** a, std::int64_t* lda, const double** x, + std::int64_t* incx, double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, + std::int64_t* n, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event dgmm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, - std::int64_t *n, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, + std::int64_t* n, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "dgmm_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, - std::int64_t *incx, float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, float* alpha, const float** x, + std::int64_t* incx, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, double *alpha, const double **x, - std::int64_t *incx, double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, double* alpha, const double** x, + std::int64_t* incx, double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, float alpha, const float *x, - std::int64_t incx, std::int64_t stridex, float *y, std::int64_t incy, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, float alpha, const float* x, + std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, double alpha, const double *x, - std::int64_t incx, std::int64_t stridex, double *y, std::int64_t incy, +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, double alpha, const double* x, + std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_axpy_batch, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event axpy_batch(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "axpy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t *n, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t* n, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event copy_batch(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, +sycl::event copy_batch(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "copy_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const float **a, std::int64_t *lda, - const float **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, + const float** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double *alpha, const double **a, std::int64_t *lda, - const double **b, std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, + const double** b, std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose *transa, - oneapi::mkl::transpose *transb, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose* transa, + oneapi::mkl::transpose* transb, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, + std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, double beta, double *c, std::int64_t ldc, + std::int64_t k, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, const double* b, std::int64_t ldb, + std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gemm_strided_batched, queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, const std::complex *b, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, const std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::complex beta, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + std::int64_t k, sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, + std::int64_t k, float alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event gemm_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "gemm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, +sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, +sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, +sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side left_right, +sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - oneapi::mkl::diag *unit_diag, std::int64_t *m, std::int64_t *n, float *alpha, - const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, float* alpha, + const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - oneapi::mkl::diag *unit_diag, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - oneapi::mkl::diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event trsm_batch(sycl::queue &queue, oneapi::mkl::side *left_right, - oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - oneapi::mkl::diag *unit_diag, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(sycl::queue& queue, oneapi::mkl::side* left_right, + oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + oneapi::mkl::diag* unit_diag, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "trsm_batch", " for USM"); } -sycl::event omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, +sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatcopy_batch, queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, +sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", " for USM"); } -sycl::event omatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, +sycl::event omatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", " for USM"); } -sycl::event imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, float *ab, std::int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", " for USM"); } -sycl::event imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, double *ab, std::int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", " for USM"); } -sycl::event imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, +sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", " for USM"); } -sycl::event imatcopy_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, std::complex *ab, +sycl::event imatcopy_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", " for USM"); } -sycl::event omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - float beta, const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + float beta, const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - double beta, const double *b, std::int64_t ldb, std::int64_t stride_b, - double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + double beta, const double* b, std::int64_t ldb, std::int64_t stride_b, + double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatadd_batch, queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", " for USM"); } -sycl::event omatadd_batch(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event omatadd_batch(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { throw unimplemented("blas", "omatadd_batch", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_gemm_bias.cxx b/src/blas/backends/portblas/portblas_gemm_bias.cxx index 30f638f3e..0b62ee674 100644 --- a/src/blas/backends/portblas/portblas_gemm_bias.cxx +++ b/src/blas/backends/portblas/portblas_gemm_bias.cxx @@ -19,72 +19,72 @@ // Buffer APIs -void gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + float alpha, sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", ""); } -void gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, int8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + float alpha, sycl::buffer& a, std::int64_t lda, int8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", ""); } -void gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + float alpha, sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", ""); } -void gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + float alpha, sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", ""); } // USM APIs -sycl::event gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, - std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", " for USM"); } -sycl::event gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int8_t ao, const std::int8_t *b, std::int64_t ldb, - std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int8_t ao, const std::int8_t* b, std::int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", " for USM"); } -sycl::event gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::uint8_t *a, - std::int64_t lda, std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, - std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::uint8_t* a, + std::int64_t lda, std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, + std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", " for USM"); } -sycl::event gemm_bias(sycl::queue &queue, oneapi::mkl::transpose transa, +sycl::event gemm_bias(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, - std::int64_t n, std::int64_t k, float alpha, const std::uint8_t *a, - std::int64_t lda, std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, - std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies) { + std::int64_t n, std::int64_t k, float alpha, const std::uint8_t* a, + std::int64_t lda, std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, + std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_level1.cxx b/src/blas/backends/portblas/portblas_level1.cxx index 0a0af855c..6d1f39463 100644 --- a/src/blas/backends/portblas/portblas_level1.cxx +++ b/src/blas/backends/portblas/portblas_level1.cxx @@ -19,91 +19,91 @@ // Buffer APIs -void dotc(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { throw unimplemented("blas", "dotc", ""); } -void dotu(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { throw unimplemented("blas", "dotu", ""); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { CALL_PORTBLAS_FN(::blas::_iamax, queue, n, x, incx, result); } -void iamax(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamax(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "iamax", ""); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { CALL_PORTBLAS_FN(::blas::_iamin, queue, n, x, incx, result); } -void iamin(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void iamin(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "iamin", ""); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "asum", ""); } -void asum(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void asum(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { // portBLAS asum implementation requires that result is initialized to zero // before performing the computation. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); CALL_PORTBLAS_FN(::blas::_asum, queue, n, x, incx, result); } -void axpy(sycl::queue &queue, std::int64_t n, real_t alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, real_t alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_axpy, queue, n, alpha, x, incx, y, incy); } -void axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "axpy", "for complex"); } -void axpby(sycl::queue &queue, std::int64_t n, real_t alpha, sycl::buffer &x, - std::int64_t incx, real_t beta, sycl::buffer &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, real_t alpha, sycl::buffer& x, + std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { throw unimplemented("blas", "axpby", ""); } -void axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "axpby", ""); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_copy, queue, n, x, incx, y, incy); } -void copy(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void copy(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "copy", " for complex."); } -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { // portBLAS dot implementation requires that result is initialized to zero // before performing the computation. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); @@ -111,288 +111,288 @@ void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::in } #ifdef ENABLE_MIXED_PRECISION_WITH_DOUBLE -void dot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", " for unmatched return type"); } #endif -void sdsdot(sycl::queue &queue, std::int64_t n, real_t sb, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void sdsdot(sycl::queue& queue, std::int64_t n, real_t sb, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { // portBLAS sdsdot implementation requires that result is initialized to zero // before performing the computation. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); CALL_PORTBLAS_FN(::blas::_sdsdot, queue, n, sb, x, incx, y, incy, result); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer& result) { throw unimplemented("blas", "nrm2", " for complex"); } -void nrm2(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& result) { // portBLAS nrm2 implementation requires that result is initialized to zero // before performing the computation. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto result_acc = result.template get_access(cgh); cgh.single_task([=]() { result_acc[0] = real_t(0); }); }); CALL_PORTBLAS_FN(::blas::_nrm2, queue, n, x, incx, result); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy, real_t c, +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy, real_t c, real_t s) { throw unimplemented("blas", "rot", " for complex"); } -void rot(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, real_t c, real_t s) { +void rot(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, real_t c, real_t s) { CALL_PORTBLAS_FN(::blas::_rot, queue, n, x, incx, y, incy, c, s); } -void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { CALL_PORTBLAS_FN(::blas::_rotg, queue, a, b, c, s); } -void rotg(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { throw unimplemented("blas", "rotg", " for complex"); } -void rotm(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, sycl::buffer ¶m) { +void rotm(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, sycl::buffer& param) { CALL_PORTBLAS_FN(::blas::_rotm, queue, n, x, incx, y, incy, param); } -void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, real_t y1, sycl::buffer ¶m) { +void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, real_t y1, sycl::buffer& param) { CALL_PORTBLAS_FN(::blas::_rotmg, queue, d1, d2, x1, y1, param); } -void scal(sycl::queue &queue, std::int64_t n, real_t alpha, sycl::buffer &x, +void scal(sycl::queue& queue, std::int64_t n, real_t alpha, sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_scal, queue, n, alpha, x, incx); } -void scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "scal", " for complex"); } -void scal(sycl::queue &queue, std::int64_t n, real_t alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(sycl::queue& queue, std::int64_t n, real_t alpha, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "scal", " for complex"); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_swap, queue, n, x, incx, y, incy); } -void swap(sycl::queue &queue, std::int64_t n, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, std::int64_t incy) { +void swap(sycl::queue& queue, std::int64_t n, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "swap", " for complex"); } // USM APIs -sycl::event dotc(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { +sycl::event dotc(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { throw unimplemented("blas", "dotc", " for USM"); } -sycl::event dotu(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, const std::complex *y, std::int64_t incy, - std::complex *result, const std::vector &dependencies) { +sycl::event dotu(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, const std::complex* y, std::int64_t incy, + std::complex* result, const std::vector& dependencies) { throw unimplemented("blas", "dotu", " for USM"); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_iamax, queue, n, x, incx, result, dependencies); } -sycl::event iamax(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { throw unimplemented("blas", "iamax", " for USM"); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - std::int64_t *result, const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + std::int64_t* result, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_iamin, queue, n, x, incx, result, dependencies); } -sycl::event iamin(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { throw unimplemented("blas", "iamin", " for USM"); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, real_t *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, real_t* result, const std::vector& dependencies) { throw unimplemented("blas", "asum", " for USM"); } -sycl::event asum(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - real_t *result, const std::vector &dependencies) { +sycl::event asum(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + real_t* result, const std::vector& dependencies) { // portBLAS asum implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); + [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.push_back(init_res_val); CALL_PORTBLAS_USM_FN(::blas::_asum, queue, n, x, incx, result, new_dependencies); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, real_t alpha, const real_t *x, - std::int64_t incx, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(sycl::queue& queue, std::int64_t n, real_t alpha, const real_t* x, + std::int64_t incx, real_t* y, std::int64_t incy, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_axpy, queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event axpy(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "axpy", " for USM"); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, real_t alpha, const real_t *x, - std::int64_t incx, const real_t beta, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, std::int64_t n, real_t alpha, const real_t* x, + std::int64_t incx, const real_t beta, real_t* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", " for USM"); } -sycl::event axpby(sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", " for USM"); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, real_t *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, real_t* y, + std::int64_t incy, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_copy, queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "copy", " for USM"); } -sycl::event dot(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - const real_t *y, std::int64_t incy, real_t *result, - const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + const real_t* y, std::int64_t incy, real_t* result, + const std::vector& dependencies) { // portBLAS dot implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); + [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.emplace_back(init_res_val); CALL_PORTBLAS_USM_FN(::blas::_dot, queue, n, x, incx, y, incy, result, new_dependencies); } #ifdef ENABLE_MIXED_PRECISION_WITH_DOUBLE -sycl::event dot(sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { throw unimplemented("blas", "dot", " for USM"); } #endif -sycl::event sdsdot(sycl::queue &queue, std::int64_t n, real_t sb, const real_t *x, - std::int64_t incx, const real_t *y, std::int64_t incy, real_t *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, std::int64_t n, real_t sb, const real_t* x, + std::int64_t incx, const real_t* y, std::int64_t incy, real_t* result, + const std::vector& dependencies) { // portBLAS sdsdot implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); + [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.emplace_back(init_res_val); CALL_PORTBLAS_USM_FN(::blas::_sdsdot, queue, n, sb, x, incx, y, incy, result, new_dependencies); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const std::complex *x, - std::int64_t incx, real_t *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const std::complex* x, + std::int64_t incx, real_t* result, const std::vector& dependencies) { throw unimplemented("blas", "nrm2", " for USM"); } -sycl::event nrm2(sycl::queue &queue, std::int64_t n, const real_t *x, std::int64_t incx, - real_t *result, const std::vector &dependencies) { +sycl::event nrm2(sycl::queue& queue, std::int64_t n, const real_t* x, std::int64_t incx, + real_t* result, const std::vector& dependencies) { // portBLAS nrm2 implementation requires result to be initializes to zero // before starting the computation. auto init_res_val = queue.submit( - [&](sycl::handler &cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); + [&](sycl::handler& cgh) { cgh.single_task([=]() { result[0] = real_t(0); }); }); std::vector new_dependencies = dependencies; new_dependencies.push_back(init_res_val); CALL_PORTBLAS_USM_FN(::blas::_nrm2, queue, n, x, incx, result, new_dependencies); } -sycl::event rot(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, real_t c, real_t s, - const std::vector &dependencies) { +sycl::event rot(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, real_t c, real_t s, + const std::vector& dependencies) { throw unimplemented("blas", "rot", " for USM"); } -sycl::event rot(sycl::queue &queue, std::int64_t n, real_t *x, std::int64_t incx, real_t *y, +sycl::event rot(sycl::queue& queue, std::int64_t n, real_t* x, std::int64_t incx, real_t* y, std::int64_t incy, real_t c, real_t s, - const std::vector &dependencies) { + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_rot, queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rotg(sycl::queue &queue, real_t *a, real_t *b, real_t *c, real_t *s, - const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, real_t* a, real_t* b, real_t* c, real_t* s, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_rotg, queue, a, b, c, s, dependencies); } -sycl::event rotg(sycl::queue &queue, std::complex *a, std::complex *b, real_t *c, - std::complex *s, const std::vector &dependencies) { +sycl::event rotg(sycl::queue& queue, std::complex* a, std::complex* b, real_t* c, + std::complex* s, const std::vector& dependencies) { throw unimplemented("blas", "rotg", " for USM"); } -sycl::event rotm(sycl::queue &queue, std::int64_t n, real_t *x, std::int64_t incx, real_t *y, - std::int64_t incy, real_t *param, const std::vector &dependencies) { +sycl::event rotm(sycl::queue& queue, std::int64_t n, real_t* x, std::int64_t incx, real_t* y, + std::int64_t incy, real_t* param, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_rotm, queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotmg(sycl::queue &queue, real_t *d1, real_t *d2, real_t *x1, real_t y1, real_t *param, - const std::vector &dependencies) { +sycl::event rotmg(sycl::queue& queue, real_t* d1, real_t* d2, real_t* x1, real_t y1, real_t* param, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_rotmg, queue, d1, d2, x1, y1, param, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, real_t alpha, real_t *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, real_t alpha, real_t* x, std::int64_t incx, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_scal, queue, n, alpha, x, incx, dependencies); } -sycl::event scal(sycl::queue &queue, std::int64_t n, std::complex alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, std::complex alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "scal", " for USM"); } -sycl::event scal(sycl::queue &queue, std::int64_t n, real_t alpha, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(sycl::queue& queue, std::int64_t n, real_t alpha, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "scal", " for USM"); } -sycl::event swap(sycl::queue &queue, std::int64_t n, real_t *x, std::int64_t incx, real_t *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, real_t* x, std::int64_t incx, real_t* y, + std::int64_t incy, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_swap, queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(sycl::queue &queue, std::int64_t n, std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(sycl::queue& queue, std::int64_t n, std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "swap", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_level2.cxx b/src/blas/backends/portblas/portblas_level2.cxx index b3d8b6766..a99077a51 100644 --- a/src/blas/backends/portblas/portblas_level2.cxx +++ b/src/blas/backends/portblas/portblas_level2.cxx @@ -19,452 +19,452 @@ // Buffer APIs -void gemv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, real_t beta, sycl::buffer &y, std::int64_t incy) { +void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "gemv", " for complex"); } -void gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, real_t alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, real_t beta, - sycl::buffer &y, std::int64_t incy) { +void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, real_t alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, real_t beta, + sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_gbmv, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, +void gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "gbmv", " for complex"); } -void ger(sycl::queue &queue, std::int64_t m, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(sycl::queue& queue, std::int64_t m, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { CALL_PORTBLAS_FN(::blas::_ger, queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { throw unimplemented("blas", "gerc", ""); } -void geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { throw unimplemented("blas", "geru", ""); } -void hbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "hbmv", ""); } -void hemv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "hemv", ""); } -void her(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { throw unimplemented("blas", "her", ""); } -void her2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { throw unimplemented("blas", "her2", ""); } -void hpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { throw unimplemented("blas", "hpmv", ""); } -void hpr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { throw unimplemented("blas", "hpr", ""); } -void hpr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { throw unimplemented("blas", "hpr2", ""); } -void sbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, - real_t alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, real_t beta, sycl::buffer &y, std::int64_t incy) { +void sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, + real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void symv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, real_t beta, sycl::buffer &y, std::int64_t incy) { +void symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, real_t beta, sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void syr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { CALL_PORTBLAS_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { CALL_PORTBLAS_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void spmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &a, sycl::buffer &x, std::int64_t incx, real_t beta, - sycl::buffer &y, std::int64_t incy) { +void spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& a, sycl::buffer& x, std::int64_t incx, real_t beta, + sycl::buffer& y, std::int64_t incy) { CALL_PORTBLAS_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { CALL_PORTBLAS_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a); } -void spr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { CALL_PORTBLAS_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +void tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "tbmv", ""); } -void tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +void tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "tbsv", ""); } -void tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "tpmv", ""); } -void tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx) { +void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "tpsv", ""); } -void trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "trmv", " for complex"); } -void trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { CALL_PORTBLAS_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { throw unimplemented("blas", "trsv", ""); } // USM APIs -sycl::event gemv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, const real_t *a, std::int64_t lda, const real_t *x, - std::int64_t incx, real_t beta, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, const real_t* a, std::int64_t lda, const real_t* x, + std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gemv, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "gemv", " for USM"); } -sycl::event gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, real_t alpha, const real_t *a, std::int64_t lda, - const real_t *x, std::int64_t incx, real_t beta, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, real_t alpha, const real_t* a, std::int64_t lda, + const real_t* x, std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gbmv, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, +sycl::event gbmv(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { throw unimplemented("blas", "gbmv", " for USM"); } -sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, real_t alpha, const real_t *x, - std::int64_t incx, const real_t *y, std::int64_t incy, real_t *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event ger(sycl::queue& queue, std::int64_t m, std::int64_t n, real_t alpha, const real_t* x, + std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, std::int64_t lda, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_ger, queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event gerc(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { throw unimplemented("blas", "gerc", " for USM"); } -sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event geru(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, + const std::vector& dependencies) { throw unimplemented("blas", "geru", " for USM"); } -sycl::event hbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "hbmv", " for USM"); } -sycl::event hemv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "hemv", " for USM"); } -sycl::event her(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "her", " for USM"); } -sycl::event her2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { throw unimplemented("blas", "her2", " for USM"); } -sycl::event hpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "hpmv", " for USM"); } -sycl::event hpr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - const std::complex *x, std::int64_t incx, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + const std::complex* x, std::int64_t incx, std::complex* a, + const std::vector& dependencies) { throw unimplemented("blas", "hpr", " for USM"); } -sycl::event hpr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { throw unimplemented("blas", "hpr2", " for USM"); } -sycl::event sbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, - real_t alpha, const real_t *a, std::int64_t lda, const real_t *x, - std::int64_t incx, real_t beta, real_t *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, + real_t alpha, const real_t* a, std::int64_t lda, const real_t* x, + std::int64_t incx, real_t beta, real_t* y, std::int64_t incy, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_sbmv, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event symv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *a, std::int64_t lda, const real_t *x, std::int64_t incx, real_t beta, - real_t *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event symv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* a, std::int64_t lda, const real_t* x, std::int64_t incx, real_t beta, + real_t* y, std::int64_t incy, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_symv, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event syr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *x, std::int64_t incx, real_t *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* x, std::int64_t incx, real_t* a, std::int64_t lda, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_syr, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *x, std::int64_t incx, const real_t *y, std::int64_t incy, real_t *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event syr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, + std::int64_t lda, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_syr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event spmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *a, const real_t *x, std::int64_t incx, real_t beta, real_t *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* a, const real_t* x, std::int64_t incx, real_t beta, real_t* y, + std::int64_t incy, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_spmv, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spr(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *x, std::int64_t incx, real_t *a, - const std::vector &dependencies) { +sycl::event spr(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* x, std::int64_t incx, real_t* a, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_spr, queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr2(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, - const real_t *x, std::int64_t incx, const real_t *y, std::int64_t incy, real_t *a, - const std::vector &dependencies) { +sycl::event spr2(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, real_t alpha, + const real_t* x, std::int64_t incx, const real_t* y, std::int64_t incy, real_t* a, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_spr2, queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, const real_t *a, - std::int64_t lda, real_t *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a, + std::int64_t lda, real_t* x, std::int64_t incx, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_tbmv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +sycl::event tbmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "tbmv", " for USM"); } -sycl::event tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, const real_t *a, - std::int64_t lda, real_t *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, const real_t* a, + std::int64_t lda, real_t* x, std::int64_t incx, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_tbsv, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +sycl::event tbsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - const std::complex *a, std::int64_t lda, std::complex *x, - std::int64_t incx, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* x, + std::int64_t incx, const std::vector& dependencies) { throw unimplemented("blas", "tbsv", " for USM"); } -sycl::event tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, const real_t *a, real_t *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, const real_t* a, real_t* x, + std::int64_t incx, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_tpmv, queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tpmv", " for USM"); } -sycl::event tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, const real_t *a, real_t *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, const real_t* a, real_t* x, + std::int64_t incx, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_tpsv, queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "tpsv", " for USM"); } -sycl::event trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, const real_t *a, std::int64_t lda, - real_t *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda, + real_t* x, std::int64_t incx, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_trmv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "trmv", " for USM"); } -sycl::event trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, const real_t *a, std::int64_t lda, - real_t *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, const real_t* a, std::int64_t lda, + real_t* x, std::int64_t incx, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_trsv, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + oneapi::mkl::diag unit_diag, std::int64_t n, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { throw unimplemented("blas", "trsv", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_level3.cxx b/src/blas/backends/portblas/portblas_level3.cxx index 4eeb1e8f1..57c6f25b1 100644 --- a/src/blas/backends/portblas/portblas_level3.cxx +++ b/src/blas/backends/portblas/portblas_level3.cxx @@ -19,19 +19,19 @@ // Buffer APIs -void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, real_t beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, real_t beta, + sycl::buffer& c, std::int64_t ldc) { CALL_PORTBLAS_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { using sycl_complex_real_t = sycl::ext::oneapi::experimental::complex; if (transa == oneapi::mkl::transpose::conjtrans || transb == oneapi::mkl::transpose::conjtrans) { @@ -63,184 +63,184 @@ void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transp queue.copy(out_pb_acc, out_acc); } -void symm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, - std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, real_t beta, - sycl::buffer &c, std::int64_t ldc) { +void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + std::int64_t m, std::int64_t n, real_t alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, real_t beta, + sycl::buffer& c, std::int64_t ldc) { CALL_PORTBLAS_FN(::blas::_symm, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "symm", ""); } -void hemm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void hemm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "hemm", ""); } -void syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer &a, - std::int64_t lda, real_t beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer& a, + std::int64_t lda, real_t beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "syrk", ""); } -void syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +void syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "syrk", ""); } -void herk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer, 1> &a, - std::int64_t lda, real_t beta, sycl::buffer, 1> &c, +void herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer, 1>& a, + std::int64_t lda, real_t beta, sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "herk", ""); } -void syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, real_t beta, - sycl::buffer &c, std::int64_t ldc) { +void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, real_t beta, + sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "syr2k", ""); } -void syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +void syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "syr2k", ""); } -void her2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +void her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, real_t beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, real_t beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "her2k", ""); } -void trmm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - real_t alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { throw unimplemented("blas", "trmm", ""); } -void trmm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { throw unimplemented("blas", "trmm", ""); } -void trsm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - real_t alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + real_t alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { CALL_PORTBLAS_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +void trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { throw unimplemented("blas", "trsm", " for complex"); } -void gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, +void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, real_t alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, real_t beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, real_t beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemmt", ""); } -void gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, +void gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "gemmt", ""); } -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { CALL_PORTBLAS_FN(::blas::_omatcopy, queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { throw unimplemented("blas", "omatcopy", ""); } -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &b, std::int64_t ldb, std::int64_t strideb) { +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, + sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { CALL_PORTBLAS_FN(::blas::_omatcopy2, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, - sycl::buffer &ab, std::int64_t lda, std::int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, real_t alpha, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { throw unimplemented("blas", "imatcopy", ""); } -void imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { throw unimplemented("blas", "imatcopy", ""); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - real_t alpha, sycl::buffer &a, std::int64_t lda, real_t beta, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &c, +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + real_t alpha, sycl::buffer& a, std::int64_t lda, real_t beta, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& c, std::int64_t ldc) { CALL_PORTBLAS_FN(::blas::_omatadd, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { +void omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { throw unimplemented("blas", "omatadd", ""); } // USM APIs -sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, const real_t *a, - std::int64_t lda, const real_t *b, std::int64_t ldb, real_t beta, real_t *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, + std::int64_t lda, const real_t* b, std::int64_t ldb, real_t beta, real_t* c, + std::int64_t ldc, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_gemm, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { if (transa == oneapi::mkl::transpose::conjtrans || transb == oneapi::mkl::transpose::conjtrans) { throw unimplemented("blas", "gemm", "Conjugate Transpose unsupported yet on portBLAS"); @@ -249,203 +249,203 @@ sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl: c, ldc, dependencies); } -sycl::event symm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, - std::int64_t m, std::int64_t n, real_t alpha, const real_t *a, std::int64_t lda, - const real_t *b, std::int64_t ldb, real_t beta, real_t *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + std::int64_t m, std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, + const real_t* b, std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_symm, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +sycl::event symm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "symm", " for USM"); } -sycl::event hemm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +sycl::event hemm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "hemm", " for USM"); } -sycl::event syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, const real_t *a, std::int64_t lda, - real_t beta, real_t *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda, + real_t beta, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "syrk", " for USM"); } -sycl::event syrk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +sycl::event syrk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "syrk", " for USM"); } -sycl::event herk(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, const std::complex *a, - std::int64_t lda, real_t beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, const std::complex* a, + std::int64_t lda, real_t beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "herk", " for USM"); } -sycl::event syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, real_t alpha, const real_t *a, std::int64_t lda, - const real_t *b, std::int64_t ldb, real_t beta, real_t *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, real_t alpha, const real_t* a, std::int64_t lda, + const real_t* b, std::int64_t ldb, real_t beta, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "syr2k", " for USM"); } -sycl::event syr2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +sycl::event syr2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "syr2k", " for USM"); } -sycl::event her2k(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, +sycl::event her2k(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, real_t beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, real_t beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "her2k", " for USM"); } -sycl::event trmm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, real_t alpha, const real_t *a, std::int64_t lda, real_t *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t* b, + std::int64_t ldb, const std::vector& dependencies) { throw unimplemented("blas", "trmm", " for USM"); } -sycl::event trmm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +sycl::event trmm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "trmm", " for USM"); } -sycl::event trsm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, real_t alpha, const real_t *a, std::int64_t lda, real_t *b, - std::int64_t ldb, const std::vector &dependencies) { + std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t* b, + std::int64_t ldb, const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_trsm, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, +sycl::event trsm(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "trsm", " for USM"); } -sycl::event gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, +sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, real_t alpha, - const real_t *a, std::int64_t lda, const real_t *b, std::int64_t ldb, real_t beta, - real_t *c, std::int64_t ldc, const std::vector &dependencies) { + const real_t* a, std::int64_t lda, const real_t* b, std::int64_t ldb, real_t beta, + real_t* c, std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "gemmt", " for USM"); } -sycl::event gemmt(sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, +sycl::event gemmt(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", " for USM"); } -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, const real_t *a, std::int64_t lda, real_t *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, const real_t* a, std::int64_t lda, real_t* b, std::int64_t ldb, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatcopy, queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy", "for USM"); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, const real_t *a, std::int64_t lda, std::int64_t stridea, - real_t *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, const real_t* a, std::int64_t lda, std::int64_t stridea, + real_t* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatcopy2, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, std::int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", "for USM"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - real_t alpha, real_t *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + real_t alpha, real_t* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", ""); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", ""); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, real_t alpha, const real_t *a, std::int64_t lda, real_t beta, - const real_t *b, std::int64_t ldb, real_t *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, real_t alpha, const real_t* a, std::int64_t lda, real_t beta, + const real_t* b, std::int64_t ldb, real_t* c, std::int64_t ldc, + const std::vector& dependencies) { CALL_PORTBLAS_USM_FN(::blas::_omatadd, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, const std::complex *b, - std::int64_t ldb, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, const std::complex* b, + std::int64_t ldb, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "omatadd", ""); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - real_t *alpha, const real_t **a, int64_t *lda, real_t **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + real_t* alpha, const real_t** a, int64_t* lda, real_t** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", ""); } -sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, const std::complex **a, - int64_t *lda, std::complex **b, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, const std::complex** a, + int64_t* lda, std::complex** b, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "omatcopy_batch", ""); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - real_t *alpha, real_t **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + real_t* alpha, real_t** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", ""); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* groupsize, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", ""); } diff --git a/src/blas/backends/portblas/portblas_level3_bfloat16.cpp b/src/blas/backends/portblas/portblas_level3_bfloat16.cpp index 1684b1b3e..cb5bac88f 100644 --- a/src/blas/backends/portblas/portblas_level3_bfloat16.cpp +++ b/src/blas/backends/portblas/portblas_level3_bfloat16.cpp @@ -33,20 +33,20 @@ namespace portblas { namespace column_major { // BUFFER -void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " for bfloat16"); } // USM -sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const oneapi::mkl::bfloat16 *a, std::int64_t lda, const oneapi::mkl::bfloat16 *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + const oneapi::mkl::bfloat16* a, std::int64_t lda, const oneapi::mkl::bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } @@ -54,20 +54,20 @@ sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl: namespace row_major { // BUFFER -void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " for bfloat16"); } // USM -sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const oneapi::mkl::bfloat16 *a, std::int64_t lda, const oneapi::mkl::bfloat16 *b, - std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { + const oneapi::mkl::bfloat16* a, std::int64_t lda, const oneapi::mkl::bfloat16* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } diff --git a/src/blas/backends/portblas/portblas_level3_half.cpp b/src/blas/backends/portblas/portblas_level3_half.cpp index 0e42528fa..136178998 100644 --- a/src/blas/backends/portblas/portblas_level3_half.cpp +++ b/src/blas/backends/portblas/portblas_level3_half.cpp @@ -33,66 +33,66 @@ namespace portblas { namespace column_major { // BUFFER -void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " half"); } -void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " for different argument data types"); } // USM -sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } -sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } } // namespace column_major namespace row_major { // BUFFER -void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " half"); } -void gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +void gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { throw unimplemented("blas", "gemm", " for different argument data types"); } // USM -sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, +sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } -sycl::event gemm(sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { throw unimplemented("blas", "gemm", " for USM"); } diff --git a/src/blas/backends/rocblas/rocblas_batch.cpp b/src/blas/backends/rocblas/rocblas_batch.cpp index 225616427..ef614c02b 100644 --- a/src/blas/backends/rocblas/rocblas_batch.cpp +++ b/src/blas/backends/rocblas/rocblas_batch.cpp @@ -28,7 +28,7 @@ // Helper Functions template -static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf, const int64_t len, +static inline void conj_vector(sycl::handler& cgh, sycl::buffer& buf, const int64_t len, const int64_t inc, const int64_t stride, const int64_t batch_size) { const auto abs_inc = std::abs(inc); const auto abs_stride = std::abs(stride); @@ -40,7 +40,7 @@ static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf, const i }); } template -static inline void conj_vector(sycl::handler &cgh, T *ptr, const int64_t len, const int64_t inc, +static inline void conj_vector(sycl::handler& cgh, T* ptr, const int64_t len, const int64_t inc, const int64_t stride, const int64_t batch_size) { const auto abs_inc = std::abs(inc); const auto abs_stride = std::abs(stride); @@ -52,7 +52,7 @@ static inline void conj_vector(sycl::handler &cgh, T *ptr, const int64_t len, co } template -static inline void conj_vector(sycl::handler &cgh, T **ptr, const int64_t len, const int64_t inc, +static inline void conj_vector(sycl::handler& cgh, T** ptr, const int64_t len, const int64_t inc, const int64_t stride, const int64_t group_size) { const auto abs_inc = std::abs(inc); cgh.parallel_for(sycl::range{ (std::size_t)group_size, (std::size_t)len }, @@ -72,30 +72,30 @@ namespace column_major { // Buffer APIs template -inline void copy_batch(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +inline void copy_batch(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy, stridex, stridey, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, stridex, y_, incy, stridey, - batch_size); + batch_size); }); }); } #define COPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, \ + void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, \ int64_t batch_size) { \ copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, stridex, y, incy, stridey, batch_size); \ } @@ -108,30 +108,30 @@ COPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zcopy_strided_batched) #undef COPY_STRIDED_BATCH_LAUNCHER template -inline void axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +inline void axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy, stridex, stridey, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; - rocblas_native_func(func, err, handle, n, (rocDataType *)&alpha, x_, incx, stridex, - y_, incy, stridey, batch_size); + rocblas_native_func(func, err, handle, n, (rocDataType*)&alpha, x_, incx, stridex, y_, + incy, stridey, batch_size); }); }); } #define AXPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void axpy_batch(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, \ + void axpy_batch(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, \ int64_t stridey, int64_t batch_size) { \ axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, stridex, y, incy, stridey, \ batch_size); \ @@ -145,36 +145,36 @@ AXPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zaxpy_strided_batched) #undef AXPY_BATCH_LAUNCHER template -inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, T beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { +inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, T beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, incx, incy, stridea, stridex, stridey, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, - (rocDataType *)&alpha, a_, lda, stridea, x_, incx, stridex, - (rocDataType *)&beta, y_, incy, stridey, batch_size); + (rocDataType*)&alpha, a_, lda, stridea, x_, incx, stridex, + (rocDataType*)&beta, y_, incy, stridey, batch_size); }); }); } #define GEMV_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &x, int64_t incx, int64_t stridex, TYPE beta, \ - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { \ + void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& x, int64_t incx, int64_t stridex, TYPE beta, \ + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { \ gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, \ beta, y, incy, stridey, batch_size); \ } @@ -187,35 +187,35 @@ GEMV_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgemv_strided_batched) #undef GEMV_STRIDED_BATCH_LAUNCHER template -inline void dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &c, int64_t ldc, +inline void dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldc, incx, stridea, stridex, stridec, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; - rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), m, n, a_, - lda, stridea, x_, incx, stridex, c_, ldc, stridec, batch_size); + rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), m, n, a_, lda, + stridea, x_, incx, stridex, c_, ldc, stridec, batch_size); }); }); } #define DGMM_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &x, int64_t incx, int64_t stridex, \ - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ + void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& x, int64_t incx, int64_t stridex, \ + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, \ ldc, stridec, batch_size); \ } @@ -228,10 +228,10 @@ DGMM_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zdgmm_strided_batched) #undef DGMM_STRIDED_BATCH_LAUNCHER template -inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, Ts alpha, sycl::buffer &a, int64_t lda, - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, - Ts beta, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void gemm_batch_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, Ts alpha, sycl::buffer& a, int64_t lda, + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, + Ts beta, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { using rocTypeA = typename RocEquivalentType::Type; using rocTypeB = typename RocEquivalentType::Type; @@ -241,35 +241,35 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran int32_t solution_index = 0; rocblas_gemm_flags flags = rocblas_gemm_flags_none; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(rocblas_gemm_strided_batched_ex, err, handle, - get_rocblas_operation(transa), get_rocblas_operation(transb), m, - n, k, &alpha, a_, get_rocblas_datatype(), lda, - stridea, b_, get_rocblas_datatype(), ldb, strideb, - &beta, c_, get_rocblas_datatype(), ldc, stridec, c_, - get_rocblas_datatype(), ldc, stridec, batch_size, - get_rocblas_datatype(), rocblas_gemm_algo_standard, - solution_index, flags); + get_rocblas_operation(transa), get_rocblas_operation(transb), m, n, + k, &alpha, a_, get_rocblas_datatype(), lda, stridea, b_, + get_rocblas_datatype(), ldb, strideb, &beta, c_, + get_rocblas_datatype(), ldc, stridec, c_, + get_rocblas_datatype(), ldc, stridec, batch_size, + get_rocblas_datatype(), rocblas_gemm_algo_standard, + solution_index, flags); }); }); } #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ gemm_batch_impl(queue, transa, transb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, \ beta, c, ldc, stridec, batch_size); \ @@ -287,10 +287,10 @@ GEMM_STRIDED_BATCH_LAUNCHER(sycl::half, sycl::half, float, float) #undef GEMM_STRIDED_BATCH_LAUNCHER #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ @@ -304,35 +304,34 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER template -inline void trsm_batch(Func func, sycl::queue &queue, side left_right, uplo upper_lower, +inline void trsm_batch(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &b, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, stridea, strideb, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, stridea, b_, ldb, strideb, - batch_size); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, stridea, b_, ldb, strideb, batch_size); }); }); } #define TRSM_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, int64_t stridea, sycl::buffer &b, int64_t ldb, \ + void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, int64_t stridea, sycl::buffer& b, int64_t ldb, \ int64_t strideb, int64_t batch_size) { \ trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, \ a, lda, stridea, b, ldb, strideb, batch_size); \ @@ -346,34 +345,33 @@ TRSM_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_ztrsm_strided_batched) #undef TRSM_STRIDED_BATCH_LAUNCHER template -inline void syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - T beta, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + T beta, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc, stridea, stridec, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, stridea, (rocDataType *)&beta, c_, ldc, stridec, - batch_size); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + stridea, (rocDataType*)&beta, c_, ldc, stridec, batch_size); }); }); } #define SYRK_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, int64_t stridea, TYPE beta, \ - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ + void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, int64_t stridea, TYPE beta, \ + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, stridea, beta, \ c, ldc, stridec, batch_size); \ } @@ -386,9 +384,9 @@ SYRK_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zsyrk_strided_batched) #undef SYRK_STRIDED_BATCH_LAUNCHER template -inline void omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &b, int64_t ldb, int64_t strideb, +inline void omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& b, int64_t ldb, int64_t strideb, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, stridea, strideb, batch_size); @@ -397,27 +395,27 @@ inline void omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64 const int64_t new_m = trans == oneapi::mkl::transpose::nontrans ? m : n; const int64_t new_n = trans == oneapi::mkl::transpose::nontrans ? n : m; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), - get_rocblas_operation(trans), new_m, new_n, - (rocDataType *)&alpha, a_, lda, stridea, (rocDataType *)&beta, - nullptr, lda, stridea, b_, ldb, strideb, batch_size); + get_rocblas_operation(trans), new_m, new_n, (rocDataType*)&alpha, + a_, lda, stridea, (rocDataType*)&beta, nullptr, lda, stridea, b_, + ldb, strideb, batch_size); }); }); } #define OMATCOPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb, \ + void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb, \ int64_t batch_size) { \ omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, \ strideb, batch_size); \ @@ -430,63 +428,63 @@ OMATCOPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgeam_strided_batc #undef OMATCOPY_STRIDED_BATCH_LAUNCHER -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } template -inline void omatadd_batch(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, sycl::buffer &a, int64_t lda, - int64_t stridea, const T beta, sycl::buffer &b, int64_t ldb, - int64_t strideb, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void omatadd_batch(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, sycl::buffer& a, int64_t lda, + int64_t stridea, const T beta, sycl::buffer& b, int64_t ldb, + int64_t strideb, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc, stridea, strideb, stridec, batch_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, (rocDataType *)&alpha, a_, - lda, stridea, (rocDataType *)&beta, b_, ldb, strideb, c_, ldc, - stridec, batch_size); + get_rocblas_operation(transb), m, n, (rocDataType*)&alpha, a_, lda, + stridea, (rocDataType*)&beta, b_, ldb, strideb, c_, ldc, stridec, + batch_size); }); }); } #define OMATADD_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, const TYPE beta, sycl::buffer &b, int64_t ldb, \ - int64_t strideb, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, const TYPE beta, sycl::buffer& b, int64_t ldb, \ + int64_t strideb, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ omatadd_batch(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, stridea, beta, \ b, ldb, strideb, c, ldc, stridec, batch_size); \ @@ -502,26 +500,26 @@ OMATADD_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgeam_strided_batch // USM APIs template -inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t *n, const T **x, int64_t *incx, - T **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t* n, const T** x, int64_t* incx, + T** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(n[i], incx[i], incy[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **x_ = reinterpret_cast(x); - auto **y_ = reinterpret_cast(y); + auto** x_ = reinterpret_cast(x); + auto** y_ = reinterpret_cast(y); rocblas_native_func(func, err, handle, (int)n[i], x_ + offset, (int)incx[i], - y_ + offset, (int)incy[i], (int)group_size[i]); + y_ + offset, (int)incy[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -531,9 +529,9 @@ inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t *n, const T } #define COPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy_batch(sycl::queue &queue, int64_t *n, const TYPE **x, int64_t *incx, \ - TYPE **y, int64_t *incy, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event copy_batch(sycl::queue& queue, int64_t* n, const TYPE** x, int64_t* incx, \ + TYPE** y, int64_t* incy, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, group_count, group_size, \ dependencies); \ } @@ -546,22 +544,22 @@ COPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zcopy_batched) #undef COPY_BATCH_LAUNCHER_USM template -inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t n, const T *x, int64_t incx, - int64_t stridex, T *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t n, const T* x, int64_t incx, + int64_t stridex, T* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy, stridex, stridey, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, stridex, y_, incy, stridey, - batch_size); + batch_size); }); }); @@ -569,9 +567,9 @@ inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t n, const T } #define COPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy_batch(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, \ - int64_t stridex, TYPE *y, int64_t incy, int64_t stridey, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event copy_batch(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, \ + int64_t stridex, TYPE* y, int64_t incy, int64_t stridey, \ + int64_t batch_size, const std::vector& dependencies) { \ return copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, stridex, y, incy, stridey, \ batch_size, dependencies); \ } @@ -584,27 +582,27 @@ COPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zcopy_strided_batc #undef COPY_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t *n, T *alpha, const T **x, - int64_t *incx, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t* n, T* alpha, const T** x, + int64_t* incx, T** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(n[i], incx[i], incy[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **x_ = reinterpret_cast(x); - auto **y_ = reinterpret_cast(y); - rocblas_native_func(func, err, handle, (int)n[i], (rocDataType *)&alpha[i], - x_ + offset, (int)incx[i], y_ + offset, (int)incy[i], - (int)group_size[i]); + auto** x_ = reinterpret_cast(x); + auto** y_ = reinterpret_cast(y); + rocblas_native_func(func, err, handle, (int)n[i], (rocDataType*)&alpha[i], + x_ + offset, (int)incx[i], y_ + offset, (int)incy[i], + (int)group_size[i]); offset += group_size[i]; } }); @@ -614,9 +612,9 @@ inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t *n, T *alph } #define AXPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy_batch(sycl::queue &queue, int64_t *n, TYPE *alpha, const TYPE **x, \ - int64_t *incx, TYPE **y, int64_t *incy, int64_t group_count, \ - int64_t *group_size, const std::vector &dependencies) { \ + sycl::event axpy_batch(sycl::queue& queue, int64_t* n, TYPE* alpha, const TYPE** x, \ + int64_t* incx, TYPE** y, int64_t* incy, int64_t group_count, \ + int64_t* group_size, const std::vector& dependencies) { \ return axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, group_count, \ group_size, dependencies); \ } @@ -629,22 +627,22 @@ AXPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zaxpy_batched) #undef AXPY_BATCH_LAUNCHER_USM template -inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, const T *x, - int64_t incx, int64_t stridex, T *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, const T* x, + int64_t incx, int64_t stridex, T* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy, stridex, stridey, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; - rocblas_native_func(func, err, handle, n, (rocDataType *)&alpha, x_, incx, stridex, - y_, incy, stridey, batch_size); + rocblas_native_func(func, err, handle, n, (rocDataType*)&alpha, x_, incx, stridex, y_, + incy, stridey, batch_size); }); }); @@ -652,9 +650,9 @@ inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, } #define AXPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy_batch(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - int64_t stridex, TYPE *y, int64_t incy, int64_t stridey, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event axpy_batch(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + int64_t stridex, TYPE* y, int64_t incy, int64_t stridey, \ + int64_t batch_size, const std::vector& dependencies) { \ return axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, stridex, y, incy, stridey, \ batch_size, dependencies); \ } @@ -667,26 +665,26 @@ AXPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zaxpy_strided_batc #undef AXPY_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, int64_t stridea, const T *x, - int64_t incx, int64_t stridex, T beta, T *y, int64_t incy, +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, const T* a, int64_t lda, int64_t stridea, const T* x, + int64_t incx, int64_t stridex, T beta, T* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, incx, incy, stridea, stridex, stridey, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, - (rocDataType *)&alpha, a_, lda, stridea, x_, incx, stridex, - (rocDataType *)&beta, y_, incy, stridey, batch_size); + (rocDataType*)&alpha, a_, lda, stridea, x_, incx, stridex, + (rocDataType*)&beta, y_, incy, stridey, batch_size); }); }); @@ -694,11 +692,11 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, in } #define GEMV_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, const TYPE *x, \ - int64_t incx, int64_t stridex, TYPE beta, TYPE *y, int64_t incy, \ + sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, const TYPE* x, \ + int64_t incx, int64_t stridex, TYPE beta, TYPE* y, int64_t incy, \ int64_t stridey, int64_t batch_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, x, incx, \ stridex, beta, y, incy, stridey, batch_size, dependencies); \ } @@ -711,30 +709,30 @@ GEMV_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgemv_strided_batc #undef GEMV_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, const T **x, - int64_t *incx, T *beta, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, T* alpha, const T** a, int64_t* lda, const T** x, + int64_t* incx, T* beta, T** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], incx[i], incy[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **x_ = reinterpret_cast(x); - auto **y_ = reinterpret_cast(y); - rocblas_native_func( - func, err, handle, get_rocblas_operation(trans[i]), (int)m[i], (int)n[i], - (rocDataType *)&alpha[i], a_ + offset, (int)lda[i], x_ + offset, (int)incx[i], - (rocDataType *)&beta[i], y_ + offset, (int)incy[i], (int)group_size[i]); + auto** a_ = reinterpret_cast(a); + auto** x_ = reinterpret_cast(x); + auto** y_ = reinterpret_cast(y); + rocblas_native_func(func, err, handle, get_rocblas_operation(trans[i]), (int)m[i], + (int)n[i], (rocDataType*)&alpha[i], a_ + offset, (int)lda[i], + x_ + offset, (int)incx[i], (rocDataType*)&beta[i], y_ + offset, + (int)incy[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -745,9 +743,9 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i #define GEMV_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ sycl::event gemv_batch( \ - sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, TYPE *alpha, const TYPE **a, \ - int64_t *lda, const TYPE **x, int64_t *incx, TYPE *beta, TYPE **y, int64_t *incy, \ - int64_t group_count, int64_t *group_size, const std::vector &dependencies) { \ + sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, TYPE* alpha, const TYPE** a, \ + int64_t* lda, const TYPE** x, int64_t* incx, TYPE* beta, TYPE** y, int64_t* incy, \ + int64_t group_count, int64_t* group_size, const std::vector& dependencies) { \ return gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, \ incy, group_count, group_size, dependencies); \ } @@ -760,24 +758,24 @@ GEMV_BATCH_LAUNCHER_USM(std::complex, rocblas_zgemv_batched) #undef GEMV_BATCH_LAUNCHER_USM template -inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m, int64_t n, - const T *a, int64_t lda, int64_t stridea, const T *x, int64_t incx, - int64_t stridex, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m, int64_t n, + const T* a, int64_t lda, int64_t stridea, const T* x, int64_t incx, + int64_t stridex, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, incx, stridea, stridex, stridec, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto c_ = reinterpret_cast(c); rocblas_status err; - rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), m, n, a_, - lda, stridea, x_, incx, stridex, c_, ldc, stridec, batch_size); + rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), m, n, a_, lda, + stridea, x_, incx, stridex, c_, ldc, stridec, batch_size); }); }); @@ -785,10 +783,10 @@ inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side left_right, in } #define DGMM_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, \ - const TYPE *a, int64_t lda, int64_t stridea, const TYPE *x, \ - int64_t incx, int64_t stridex, TYPE *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, \ + const TYPE* a, int64_t lda, int64_t stridea, const TYPE* x, \ + int64_t incx, int64_t stridex, TYPE* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, stridea, x, incx, \ stridex, c, ldc, stridec, batch_size, dependencies); \ } @@ -801,29 +799,29 @@ DGMM_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zdgmm_strided_batc #undef DGMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side *left_right, int64_t *m, - int64_t *n, const T **a, int64_t *lda, const T **x, int64_t *incx, - T **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side* left_right, int64_t* m, + int64_t* n, const T** a, int64_t* lda, const T** x, int64_t* incx, + T** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], ldc[i], incx[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **x_ = reinterpret_cast(x); - auto **c_ = reinterpret_cast(c); + auto** a_ = reinterpret_cast(a); + auto** x_ = reinterpret_cast(x); + auto** c_ = reinterpret_cast(c); rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right[i]), - (int)m[i], (int)n[i], a_ + offset, (int)lda[i], x_ + offset, - (int)incx[i], c_ + offset, (int)ldc[i], (int)group_size[i]); + (int)m[i], (int)n[i], a_ + offset, (int)lda[i], x_ + offset, + (int)incx[i], c_ + offset, (int)ldc[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -833,10 +831,10 @@ inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side *left_right, i } #define DGMM_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, \ - const TYPE **a, int64_t *lda, const TYPE **x, int64_t *incx, TYPE **c, \ - int64_t *ldc, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, \ + const TYPE** a, int64_t* lda, const TYPE** x, int64_t* incx, TYPE** c, \ + int64_t* ldc, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, x, incx, c, ldc, \ group_count, group_size, dependencies); \ } @@ -849,13 +847,13 @@ DGMM_BATCH_LAUNCHER_USM(std::complex, rocblas_zdgmm_batched) #undef DGMM_BATCH_LAUNCHER template -inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose transa, +inline sycl::event gemm_batch_strided_usm_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - Ts alpha, const Ta *a, int64_t lda, int64_t stridea, - const Tb *b, int64_t ldb, int64_t strideb, Ts beta, - Tc *c, int64_t ldc, int64_t stridec, + Ts alpha, const Ta* a, int64_t lda, int64_t stridea, + const Tb* b, int64_t ldb, int64_t strideb, Ts beta, + Tc* c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocTypeA = typename RocEquivalentType::Type; using rocTypeB = typename RocEquivalentType::Type; using rocTypeC = typename RocEquivalentType::Type; @@ -864,23 +862,23 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra int32_t solution_index = 0; rocblas_gemm_flags flags = rocblas_gemm_flags_none; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(rocblas_gemm_strided_batched_ex, err, handle, - get_rocblas_operation(transa), get_rocblas_operation(transb), m, - n, k, &alpha, a_, get_rocblas_datatype(), lda, - stridea, b_, get_rocblas_datatype(), ldb, strideb, - &beta, c_, get_rocblas_datatype(), ldc, stridec, c_, - get_rocblas_datatype(), ldc, stridec, batch_size, - get_rocblas_datatype(), rocblas_gemm_algo_standard, - solution_index, flags); + get_rocblas_operation(transa), get_rocblas_operation(transb), m, n, + k, &alpha, a_, get_rocblas_datatype(), lda, stridea, b_, + get_rocblas_datatype(), ldb, strideb, &beta, c_, + get_rocblas_datatype(), ldc, stridec, c_, + get_rocblas_datatype(), ldc, stridec, batch_size, + get_rocblas_datatype(), rocblas_gemm_algo_standard, + solution_index, flags); }); }); @@ -888,11 +886,11 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra } #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stridea, const TYPE_B *b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stridea, const TYPE_B* b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return gemm_batch_strided_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, stridea, \ b, ldb, strideb, beta, c, ldc, stridec, batch_size, \ dependencies); \ @@ -910,11 +908,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(sycl::half, sycl::half, float, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stridea, const TYPE_B *b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stridea, const TYPE_B* b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -927,11 +925,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, transpose *transb, - int64_t *m, int64_t *n, int64_t *k, Ts *alpha, const Ta **a, - int64_t *lda, const Tb **b, int64_t *ldb, Ts *beta, Tc **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event gemm_batch_usm_impl(sycl::queue& queue, transpose* transa, transpose* transb, + int64_t* m, int64_t* n, int64_t* k, Ts* alpha, const Ta** a, + int64_t* lda, const Tb** b, int64_t* ldb, Ts* beta, Tc** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocTypeA = typename RocEquivalentType::Type; using rocTypeB = typename RocEquivalentType::Type; using rocTypeC = typename RocEquivalentType::Type; @@ -942,17 +940,17 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr int32_t solution_index = 0; rocblas_gemm_flags flags = rocblas_gemm_flags_none; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **b_ = reinterpret_cast(b); - auto **c_ = reinterpret_cast(c); + auto** a_ = reinterpret_cast(a); + auto** b_ = reinterpret_cast(b); + auto** c_ = reinterpret_cast(c); rocblas_native_func( rocblas_gemm_batched_ex, err, handle, get_rocblas_operation(transa[i]), get_rocblas_operation(transb[i]), (int)m[i], (int)n[i], (int)k[i], &alpha[i], @@ -971,11 +969,11 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr } #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return gemm_batch_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc, group_count, group_size, dependencies); \ } @@ -992,11 +990,11 @@ GEMM_BATCH_LAUNCHER_USM(sycl::half, sycl::half, float, float) #undef GEMM_BATCH_LAUNCHER_USM #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -1009,27 +1007,26 @@ GEMM_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_BATCH_LAUNCHER_USM template -inline sycl::event trsm_batch(Func func, sycl::queue &queue, side left_right, uplo upper_lower, +inline sycl::event trsm_batch(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - const T *a, int64_t lda, int64_t stridea, T *b, int64_t ldb, + const T* a, int64_t lda, int64_t stridea, T* b, int64_t ldb, int64_t strideb, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, stridea, strideb, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, stridea, b_, ldb, strideb, - batch_size); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, stridea, b_, ldb, strideb, batch_size); }); }); @@ -1037,10 +1034,10 @@ inline sycl::event trsm_batch(Func func, sycl::queue &queue, side left_right, up } #define TRSM_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, int64_t strideb, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, int64_t strideb, \ + int64_t batch_size, const std::vector& dependencies) { \ return trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, stridea, b, ldb, strideb, batch_size, dependencies); \ } @@ -1053,31 +1050,31 @@ TRSM_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_ztrsm_strided_batc #undef TRSM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event trsm_batch(Func func, sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, T *alpha, - const T **a, int64_t *lda, T **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event trsm_batch(Func func, sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, T* alpha, + const T** a, int64_t* lda, T** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], ldb[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **b_ = reinterpret_cast(b); + auto** a_ = reinterpret_cast(a); + auto** b_ = reinterpret_cast(b); rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right[i]), - get_rocblas_fill_mode(upper_lower[i]), - get_rocblas_operation(trans[i]), - get_rocblas_diag_type(unit_diag[i]), (int)m[i], (int)n[i], - (rocDataType *)&alpha[i], a_ + offset, (int)lda[i], - b_ + offset, (int)ldb[i], (int)group_size[i]); + get_rocblas_fill_mode(upper_lower[i]), + get_rocblas_operation(trans[i]), + get_rocblas_diag_type(unit_diag[i]), (int)m[i], (int)n[i], + (rocDataType*)&alpha[i], a_ + offset, (int)lda[i], b_ + offset, + (int)ldb[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -1087,11 +1084,11 @@ inline sycl::event trsm_batch(Func func, sycl::queue &queue, side *left_right, u } #define TRSM_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, \ - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, TYPE *alpha, \ - const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, \ + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, TYPE* alpha, \ + const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, group_count, group_size, dependencies); \ } @@ -1104,30 +1101,30 @@ TRSM_BATCH_LAUNCHER_USM(std::complex, rocblas_ztrsm_batched) #undef TRSM_BATCH_LAUNCHER_USM template -inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo *upper_lower, transpose *trans, - int64_t *n, int64_t *k, T *alpha, const T **a, int64_t *lda, T *beta, - T **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo* upper_lower, transpose* trans, + int64_t* n, int64_t* k, T* alpha, const T** a, int64_t* lda, T* beta, + T** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(n[i], k[i], lda[i], ldc[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **c_ = reinterpret_cast(c); + auto** a_ = reinterpret_cast(a); + auto** c_ = reinterpret_cast(c); rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower[i]), - get_rocblas_operation(trans[i]), (int)n[i], (int)k[i], - (rocDataType *)&alpha[i], a_ + offset, (int)lda[i], - (rocDataType *)&beta[i], c_ + offset, (int)ldc[i], - (int)group_size[i]); + get_rocblas_operation(trans[i]), (int)n[i], (int)k[i], + (rocDataType*)&alpha[i], a_ + offset, (int)lda[i], + (rocDataType*)&beta[i], c_ + offset, (int)ldc[i], + (int)group_size[i]); offset += group_size[i]; } }); @@ -1137,10 +1134,10 @@ inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo *upper_lower, } #define SYRK_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, \ - int64_t *k, TYPE *alpha, const TYPE **a, int64_t *lda, TYPE *beta, \ - TYPE **c, int64_t *ldc, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, \ + int64_t* k, TYPE* alpha, const TYPE** a, int64_t* lda, TYPE* beta, \ + TYPE** c, int64_t* ldc, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, \ c, ldc, group_count, group_size, dependencies); \ } @@ -1153,25 +1150,24 @@ SYRK_BATCH_LAUNCHER_USM(std::complex, rocblas_zsyrk_batched) #undef SYRK_BATCH_LAUNCHER_USM template -inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, const T alpha, const T *a, int64_t lda, - int64_t stridea, const T beta, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, const T alpha, const T* a, int64_t lda, + int64_t stridea, const T beta, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc, stridea, stridec, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, stridea, (rocDataType *)&beta, c_, ldc, stridec, - batch_size); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + stridea, (rocDataType*)&beta, c_, ldc, stridec, batch_size); }); }); @@ -1179,11 +1175,11 @@ inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, t } #define SYRK_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, \ - int64_t k, const TYPE alpha, const TYPE *a, int64_t lda, \ - int64_t stridea, const TYPE beta, TYPE *c, int64_t ldc, \ + sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, \ + int64_t k, const TYPE alpha, const TYPE* a, int64_t lda, \ + int64_t stridea, const TYPE beta, TYPE* c, int64_t ldc, \ int64_t stridec, int64_t batch_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ stridea, beta, c, ldc, stridec, batch_size, dependencies); \ } @@ -1196,11 +1192,11 @@ SYRK_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zsyrk_strided_batc #undef SYRK_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, const T alpha, const T *a, int64_t lda, - int64_t stridea, T *b, int64_t ldb, int64_t strideb, +inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, const T alpha, const T* a, int64_t lda, + int64_t stridea, T* b, int64_t ldb, int64_t strideb, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, stridea, strideb, batch_size); @@ -1208,18 +1204,18 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose trans const int64_t new_m = trans == oneapi::mkl::transpose::nontrans ? m : n; const int64_t new_n = trans == oneapi::mkl::transpose::nontrans ? n : m; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), - get_rocblas_operation(trans), new_m, new_n, - (rocDataType *)&alpha, a_, lda, stridea, (rocDataType *)&beta, - nullptr, lda, stridea, b_, ldb, strideb, batch_size); + get_rocblas_operation(trans), new_m, new_n, (rocDataType*)&alpha, + a_, lda, stridea, (rocDataType*)&beta, nullptr, lda, stridea, b_, + ldb, strideb, batch_size); }); }); @@ -1227,10 +1223,10 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose trans } #define OMATCOPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, const TYPE *a, int64_t lda, int64_t stridea, \ - TYPE *b, int64_t ldb, int64_t strideb, int64_t batch_size, \ - const std::vector &dependencies) { \ + sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, const TYPE* a, int64_t lda, int64_t stridea, \ + TYPE* b, int64_t ldb, int64_t strideb, int64_t batch_size, \ + const std::vector& dependencies) { \ return omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, \ strideb, batch_size, dependencies); \ } @@ -1242,54 +1238,54 @@ OMATCOPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_strided_ #undef OMATCOPY_STRIDED_BATCH_LAUNCHER_USM -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } template -inline sycl::event omatadd_batch(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, const T *a, int64_t lda, - int64_t stridea, const T beta, const T *b, int64_t ldb, - int64_t strideb, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event omatadd_batch(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, const T* a, int64_t lda, + int64_t stridea, const T beta, const T* b, int64_t ldb, + int64_t strideb, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc, stridea, strideb, stridec, batch_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, (rocDataType *)&alpha, a_, - lda, stridea, (rocDataType *)&beta, b_, ldb, strideb, c_, ldc, - stridec, batch_size); + get_rocblas_operation(transb), m, n, (rocDataType*)&alpha, a_, lda, + stridea, (rocDataType*)&beta, b_, ldb, strideb, c_, ldc, stridec, + batch_size); }); }); @@ -1297,11 +1293,11 @@ inline sycl::event omatadd_batch(Func func, sycl::queue &queue, transpose transa } #define OMATADD_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, const TYPE *a, int64_t lda, \ - int64_t stridea, const TYPE beta, const TYPE *b, int64_t ldb, \ - int64_t strideb, TYPE *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, const TYPE* a, int64_t lda, \ + int64_t stridea, const TYPE beta, const TYPE* b, int64_t ldb, \ + int64_t strideb, TYPE* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return omatadd_batch(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, stridea, \ beta, b, ldb, strideb, c, ldc, stridec, batch_size, dependencies); \ } @@ -1314,35 +1310,35 @@ OMATADD_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_strided_b #undef OMATADD_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, T **b, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, T* alpha, const T** a, int64_t* lda, T** b, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; for (int64_t i = 0; i < group_count; i++) { overflow_check(m[i], n[i], lda[i], ldb[i], group_size[i]); } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a); - auto **b_ = reinterpret_cast(b); + auto** a_ = reinterpret_cast(a); + auto** b_ = reinterpret_cast(b); const T beta = 0; const auto new_m = trans[i] == oneapi::mkl::transpose::nontrans ? m[i] : n[i]; const auto new_n = trans[i] == oneapi::mkl::transpose::nontrans ? n[i] : m[i]; rocblas_native_func(func, err, handle, get_rocblas_operation(trans[i]), - get_rocblas_operation(trans[i]), (int)new_m, (int)new_n, - (rocDataType *)&alpha[i], a_ + offset, (int)lda[i], - (rocDataType *)&beta, nullptr, (int)lda[i], b_ + offset, - (int)ldb[i], (int)group_size[i]); + get_rocblas_operation(trans[i]), (int)new_m, (int)new_n, + (rocDataType*)&alpha[i], a_ + offset, (int)lda[i], + (rocDataType*)&beta, nullptr, (int)lda[i], b_ + offset, + (int)ldb[i], (int)group_size[i]); offset += group_size[i]; } }); @@ -1352,10 +1348,10 @@ inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose *tran } #define OMATCOPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, \ - TYPE *alpha, const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, \ + TYPE* alpha, const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb, \ group_count, group_size, dependencies); \ } @@ -1367,31 +1363,31 @@ OMATCOPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_batched) #undef OMATCOPY_BATCH_LAUNCHER_USM -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, float **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, float** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, double **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, double** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for column_major layout"); } @@ -1402,15 +1398,15 @@ namespace row_major { // Buffer APIs template -inline void copy_batch(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +inline void copy_batch(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { column_major::copy_batch(func, queue, n, x, incx, stridex, y, incy, stridey, batch_size); } #define COPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void copy_batch(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - int64_t stridex, sycl::buffer &y, int64_t incy, int64_t stridey, \ + void copy_batch(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, \ int64_t batch_size) { \ copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, stridex, y, incy, stridey, batch_size); \ } @@ -1423,15 +1419,15 @@ COPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zcopy_strided_batched) #undef COPY_STRIDED_BATCH_LAUNCHER template -inline void axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, +inline void axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { column_major::axpy_batch(func, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } #define AXPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void axpy_batch(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, int64_t stridex, sycl::buffer &y, int64_t incy, \ + void axpy_batch(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, int64_t stridex, sycl::buffer& y, int64_t incy, \ int64_t stridey, int64_t batch_size) { \ axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, stridex, y, incy, stridey, \ batch_size); \ @@ -1445,10 +1441,10 @@ AXPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zaxpy_strided_batched) #undef AXPY_STRIDED_BATCH_LAUNCHER template -inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - int64_t stridea, sycl::buffer, 1> &x, int64_t incx, - int64_t stridex, std::complex beta, sycl::buffer, 1> &y, +inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + int64_t stridea, sycl::buffer, 1>& x, int64_t incx, + int64_t stridex, std::complex beta, sycl::buffer, 1>& y, int64_t incy, int64_t stridey, int64_t batch_size) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -1459,11 +1455,11 @@ inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m if (m > 0) { queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, x, m, incx, stridex, batch_size); }); + [&](sycl::handler& cgh) { conj_vector(cgh, x, m, incx, stridex, batch_size); }); if (n > 0) { queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); + [&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); } } } @@ -1474,16 +1470,16 @@ inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); + [&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); } } } template -inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &x, int64_t incx, int64_t stridex, T beta, - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { +inline void gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& x, int64_t incx, int64_t stridex, T beta, + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -1492,10 +1488,10 @@ inline void gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m } #define GEMV_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &x, int64_t incx, int64_t stridex, TYPE beta, \ - sycl::buffer &y, int64_t incy, int64_t stridey, int64_t batch_size) { \ + void gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& x, int64_t incx, int64_t stridex, TYPE beta, \ + sycl::buffer& y, int64_t incy, int64_t stridey, int64_t batch_size) { \ gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, \ beta, y, incy, stridey, batch_size); \ } @@ -1508,9 +1504,9 @@ GEMV_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgemv_strided_batched) #undef GEMV_STRIDED_BATCH_LAUNCHER template -inline void dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m, int64_t n, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &x, - int64_t incx, int64_t stridex, sycl::buffer &c, int64_t ldc, +inline void dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m, int64_t n, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& x, + int64_t incx, int64_t stridex, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; @@ -1520,10 +1516,10 @@ inline void dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m } #define DGMM_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &x, int64_t incx, int64_t stridex, \ - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ + void dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& x, int64_t incx, int64_t stridex, \ + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, \ ldc, stridec, batch_size); \ } @@ -1536,10 +1532,10 @@ DGMM_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zdgmm_strided_batched) #undef DGMM_STRIDED_BATCH_LAUNCHER template -inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, Ts alpha, sycl::buffer &a, int64_t lda, - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, - Ts beta, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void gemm_batch_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, Ts alpha, sycl::buffer& a, int64_t lda, + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, + Ts beta, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { auto new_transa = transb; auto new_transb = transa; @@ -1550,10 +1546,10 @@ inline void gemm_batch_impl(sycl::queue &queue, transpose transa, transpose tran #undef GEMM_STRIDED_BATCH_LAUNCHER #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ gemm_batch_impl(queue, transa, transb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, \ beta, c, ldc, stridec, batch_size); \ @@ -1571,10 +1567,10 @@ GEMM_STRIDED_BATCH_LAUNCHER(sycl::half, sycl::half, float, float) #undef GEMM_STRIDED_BATCH_LAUNCHER #define GEMM_STRIDED_BATCH_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - void gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, sycl::buffer &b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ @@ -1588,9 +1584,9 @@ GEMM_STRIDED_BATCH_LAUNCHER(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER template -inline void trsm_batch(Func func, sycl::queue &queue, side left_right, uplo upper_lower, +inline void trsm_batch(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, int64_t stridea, sycl::buffer &b, + sycl::buffer& a, int64_t lda, int64_t stridea, sycl::buffer& b, int64_t ldb, int64_t strideb, int64_t batch_size) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; @@ -1602,9 +1598,9 @@ inline void trsm_batch(Func func, sycl::queue &queue, side left_right, uplo uppe } #define TRSM_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, int64_t stridea, sycl::buffer &b, int64_t ldb, \ + void trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, int64_t stridea, sycl::buffer& b, int64_t ldb, \ int64_t strideb, int64_t batch_size) { \ trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, \ a, lda, stridea, b, ldb, strideb, batch_size); \ @@ -1618,9 +1614,9 @@ TRSM_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_ztrsm_strided_batched) #undef TRSM_STRIDED_BATCH_LAUNCHER template -inline void syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - T beta, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + T beta, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -1632,9 +1628,9 @@ inline void syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpos } #define SYRK_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, int64_t stridea, TYPE beta, \ - sycl::buffer &c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ + void syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, int64_t stridea, TYPE beta, \ + sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { \ syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, stridea, beta, \ c, ldc, stridec, batch_size); \ } @@ -1647,18 +1643,18 @@ SYRK_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zsyrk_strided_batched) #undef SYRK_STRIDED_BATCH_LAUNCHER template -inline void omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, sycl::buffer &a, int64_t lda, int64_t stridea, - sycl::buffer &b, int64_t ldb, int64_t strideb, +inline void omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, sycl::buffer& a, int64_t lda, int64_t stridea, + sycl::buffer& b, int64_t ldb, int64_t strideb, int64_t batch_size) { return column_major::omatcopy_batch(func, queue, trans, n, m, alpha, a, lda, stridea, b, ldb, strideb, batch_size); } #define OMATCOPY_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb, \ + void omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb, \ int64_t batch_size) { \ omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, \ strideb, batch_size); \ @@ -1671,45 +1667,45 @@ OMATCOPY_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgeam_strided_batc #undef OMATCOPY_STRIDED_BATCH_LAUNCHER -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb, int64_t stride, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -void imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } template -inline void omatadd_batch(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, sycl::buffer &a, int64_t lda, - int64_t stridea, const T beta, sycl::buffer &b, int64_t ldb, - int64_t strideb, sycl::buffer &c, int64_t ldc, int64_t stridec, +inline void omatadd_batch(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, sycl::buffer& a, int64_t lda, + int64_t stridea, const T beta, sycl::buffer& b, int64_t ldb, + int64_t strideb, sycl::buffer& c, int64_t ldc, int64_t stridec, int64_t batch_size) { return column_major::omatadd_batch(func, queue, transa, transb, n, m, alpha, a, lda, stridea, beta, b, ldb, strideb, c, ldc, stridec, batch_size); } #define OMATADD_STRIDED_BATCH_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, sycl::buffer &a, int64_t lda, \ - int64_t stridea, const TYPE beta, sycl::buffer &b, int64_t ldb, \ - int64_t strideb, sycl::buffer &c, int64_t ldc, int64_t stridec, \ + void omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, sycl::buffer& a, int64_t lda, \ + int64_t stridea, const TYPE beta, sycl::buffer& b, int64_t ldb, \ + int64_t strideb, sycl::buffer& c, int64_t ldc, int64_t stridec, \ int64_t batch_size) { \ omatadd_batch(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, stridea, beta, \ b, ldb, strideb, c, ldc, stridec, batch_size); \ @@ -1725,17 +1721,17 @@ OMATADD_STRIDED_BATCH_LAUNCHER(std::complex, rocblas_zgeam_strided_batch // USM APIs template -inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t *n, const T **x, int64_t *incx, - T **y, int64_t *incy, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t* n, const T** x, int64_t* incx, + T** y, int64_t* incy, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return column_major::copy_batch(func, queue, n, x, incx, y, incy, group_count, group_size, dependencies); } #define COPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy_batch(sycl::queue &queue, int64_t *n, const TYPE **x, int64_t *incx, \ - TYPE **y, int64_t *incy, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event copy_batch(sycl::queue& queue, int64_t* n, const TYPE** x, int64_t* incx, \ + TYPE** y, int64_t* incy, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, group_count, group_size, \ dependencies); \ } @@ -1748,17 +1744,17 @@ COPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zcopy_batched) #undef COPY_BATCH_LAUNCHER_USM template -inline sycl::event copy_batch(Func func, sycl::queue &queue, int64_t n, const T *x, int64_t incx, - int64_t stridex, T *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event copy_batch(Func func, sycl::queue& queue, int64_t n, const T* x, int64_t incx, + int64_t stridex, T* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { return column_major::copy_batch(func, queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } #define COPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy_batch(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, \ - int64_t stridex, TYPE *y, int64_t incy, int64_t stridey, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event copy_batch(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, \ + int64_t stridex, TYPE* y, int64_t incy, int64_t stridey, \ + int64_t batch_size, const std::vector& dependencies) { \ return copy_batch(ROCBLAS_ROUTINE, queue, n, x, incx, stridex, y, incy, stridey, \ batch_size, dependencies); \ } @@ -1771,17 +1767,17 @@ COPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zcopy_strided_batc #undef COPY_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t *n, T *alpha, const T **x, - int64_t *incx, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t* n, T* alpha, const T** x, + int64_t* incx, T** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { return column_major::axpy_batch(func, queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } #define AXPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy_batch(sycl::queue &queue, int64_t *n, TYPE *alpha, const TYPE **x, \ - int64_t *incx, TYPE **y, int64_t *incy, int64_t group_count, \ - int64_t *group_size, const std::vector &dependencies) { \ + sycl::event axpy_batch(sycl::queue& queue, int64_t* n, TYPE* alpha, const TYPE** x, \ + int64_t* incx, TYPE** y, int64_t* incy, int64_t group_count, \ + int64_t* group_size, const std::vector& dependencies) { \ return axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, group_count, \ group_size, dependencies); \ } @@ -1794,17 +1790,17 @@ AXPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zaxpy_batched) #undef AXPY_BATCH_LAUNCHER_USM template -inline sycl::event axpy_batch(Func func, sycl::queue &queue, int64_t n, T alpha, const T *x, - int64_t incx, int64_t stridex, T *y, int64_t incy, int64_t stridey, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event axpy_batch(Func func, sycl::queue& queue, int64_t n, T alpha, const T* x, + int64_t incx, int64_t stridex, T* y, int64_t incy, int64_t stridey, + int64_t batch_size, const std::vector& dependencies) { return column_major::axpy_batch(func, queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } #define AXPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy_batch(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - int64_t stridex, TYPE *y, int64_t incy, int64_t stridey, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event axpy_batch(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + int64_t stridex, TYPE* y, int64_t incy, int64_t stridey, \ + int64_t batch_size, const std::vector& dependencies) { \ return axpy_batch(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, stridex, y, incy, stridey, \ batch_size, dependencies); \ } @@ -1817,12 +1813,12 @@ AXPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zaxpy_strided_batc #undef AXPY_BATCH_LAUNCHER_USM template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - int64_t stridea, const std::complex *x, int64_t incx, - int64_t stridex, std::complex beta, std::complex *y, +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + int64_t stridea, const std::complex* x, int64_t incx, + int64_t stridex, std::complex beta, std::complex* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { sycl::event done; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -1833,13 +1829,13 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, in beta = std::conj(beta); if (m > 0) { - done = queue.submit([&](sycl::handler &cgh) { - conj_vector(cgh, (std::complex *)x, m, incx, stridex, batch_size); + done = queue.submit([&](sycl::handler& cgh) { + conj_vector(cgh, (std::complex*)x, m, incx, stridex, batch_size); }); if (n > 0) { done = queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); + [&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy, stridey, batch_size); }); } } } @@ -1851,7 +1847,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, in if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy, stridey, batch_size); }); @@ -1862,11 +1858,11 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, in } template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, int64_t stridea, const T *x, - int64_t incx, int64_t stridex, T beta, T *y, int64_t incy, +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, const T* a, int64_t lda, int64_t stridea, const T* x, + int64_t incx, int64_t stridex, T beta, T* y, int64_t incy, int64_t stridey, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -1875,11 +1871,11 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose trans, in } #define GEMV_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemv_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, const TYPE *x, \ - int64_t incx, int64_t stridex, TYPE beta, TYPE *y, int64_t incy, \ + sycl::event gemv_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, const TYPE* x, \ + int64_t incx, int64_t stridex, TYPE beta, TYPE* y, int64_t incy, \ int64_t stridey, int64_t batch_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, x, incx, \ stridex, beta, y, incy, stridey, batch_size, dependencies); \ } @@ -1892,12 +1888,12 @@ GEMV_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgemv_strided_batc #undef GEMV_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, std::complex *alpha, const std::complex **a, - int64_t *lda, const std::complex **x, int64_t *incx, - std::complex *beta, std::complex **y, int64_t *incy, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, std::complex* alpha, const std::complex** a, + int64_t* lda, const std::complex** x, int64_t* incx, + std::complex* beta, std::complex** y, int64_t* incy, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { sycl::event done; int64_t stride = 0; @@ -1907,12 +1903,12 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i beta[i] = std::conj(beta[i]); if (m[i] > 0) { - done = queue.submit([&](sycl::handler &cgh) { - conj_vector(cgh, (std::complex **)x, m[i], incx[i], stride, group_size[i]); + done = queue.submit([&](sycl::handler& cgh) { + conj_vector(cgh, (std::complex**)x, m[i], incx[i], stride, group_size[i]); }); if (n[i] > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n[i], incy[i], stride, group_size[i]); }); } @@ -1942,7 +1938,7 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i for (int64_t i = 0; i < group_count; i++) { if (trans[i] == oneapi::mkl::transpose::conjtrans) { if (n[i] > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n[i], incy[i], stride, group_size[i]); }); } @@ -1954,10 +1950,10 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i } template -inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, const T **x, - int64_t *incx, T *beta, T **y, int64_t *incy, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event gemv_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, T* alpha, const T** a, int64_t* lda, const T** x, + int64_t* incx, T* beta, T** y, int64_t* incy, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { auto tmp_trans = std::vector{ static_cast(group_count) }; for (int64_t i = 0; i < group_count; i++) { @@ -1979,9 +1975,9 @@ inline sycl::event gemv_batch(Func func, sycl::queue &queue, transpose *trans, i #define GEMV_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ sycl::event gemv_batch( \ - sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, TYPE *alpha, const TYPE **a, \ - int64_t *lda, const TYPE **x, int64_t *incx, TYPE *beta, TYPE **y, int64_t *incy, \ - int64_t group_count, int64_t *group_size, const std::vector &dependencies) { \ + sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, TYPE* alpha, const TYPE** a, \ + int64_t* lda, const TYPE** x, int64_t* incx, TYPE* beta, TYPE** y, int64_t* incy, \ + int64_t group_count, int64_t* group_size, const std::vector& dependencies) { \ return gemv_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, \ incy, group_count, group_size, dependencies); \ } @@ -1994,10 +1990,10 @@ GEMV_BATCH_LAUNCHER_USM(std::complex, rocblas_zgemv_batched) #undef GEMV_BATCH_LAUNCHER_USM template -inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side left_right, int64_t m, int64_t n, - const T *a, int64_t lda, int64_t stridea, const T *x, int64_t incx, - int64_t stridex, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side left_right, int64_t m, int64_t n, + const T* a, int64_t lda, int64_t stridea, const T* x, int64_t incx, + int64_t stridex, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; @@ -2006,10 +2002,10 @@ inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side left_right, in } #define DGMM_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event dgmm_batch(sycl::queue &queue, side left_right, int64_t m, int64_t n, \ - const TYPE *a, int64_t lda, int64_t stridea, const TYPE *x, \ - int64_t incx, int64_t stridex, TYPE *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event dgmm_batch(sycl::queue& queue, side left_right, int64_t m, int64_t n, \ + const TYPE* a, int64_t lda, int64_t stridea, const TYPE* x, \ + int64_t incx, int64_t stridex, TYPE* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, stridea, x, incx, \ stridex, c, ldc, stridec, batch_size, dependencies); \ } @@ -2022,10 +2018,10 @@ DGMM_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zdgmm_strided_batc #undef DGMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side *left_right, int64_t *m, - int64_t *n, const T **a, int64_t *lda, const T **x, int64_t *incx, - T **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event dgmm_batch(Func func, sycl::queue& queue, side* left_right, int64_t* m, + int64_t* n, const T** a, int64_t* lda, const T** x, int64_t* incx, + T** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { for (int64_t i = 0; i < group_count; i++) { const auto new_side = left_right[i] == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; @@ -2037,10 +2033,10 @@ inline sycl::event dgmm_batch(Func func, sycl::queue &queue, side *left_right, i } #define DGMM_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event dgmm_batch(sycl::queue &queue, side *left_right, int64_t *m, int64_t *n, \ - const TYPE **a, int64_t *lda, const TYPE **x, int64_t *incx, TYPE **c, \ - int64_t *ldc, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event dgmm_batch(sycl::queue& queue, side* left_right, int64_t* m, int64_t* n, \ + const TYPE** a, int64_t* lda, const TYPE** x, int64_t* incx, TYPE** c, \ + int64_t* ldc, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return dgmm_batch(ROCBLAS_ROUTINE, queue, left_right, m, n, a, lda, x, incx, c, ldc, \ group_count, group_size, dependencies); \ } @@ -2053,13 +2049,13 @@ DGMM_BATCH_LAUNCHER_USM(std::complex, rocblas_zdgmm_batched) #undef DGMM_BATCH_LAUNCHER template -inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose transa, +inline sycl::event gemm_batch_strided_usm_impl(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, int64_t k, - Ts alpha, const Ta *a, int64_t lda, int64_t stridea, - const Tb *b, int64_t ldb, int64_t strideb, Ts beta, - Tc *c, int64_t ldc, int64_t stridec, + Ts alpha, const Ta* a, int64_t lda, int64_t stridea, + const Tb* b, int64_t ldb, int64_t strideb, Ts beta, + Tc* c, int64_t ldc, int64_t stridec, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto new_transa = transb; auto new_transb = transa; @@ -2069,11 +2065,11 @@ inline sycl::event gemm_batch_strided_usm_impl(sycl::queue &queue, transpose tra } #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stridea, const TYPE_B *b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stridea, const TYPE_B* b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return gemm_batch_strided_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, stridea, \ b, ldb, strideb, beta, c, ldc, stridec, batch_size, \ dependencies); \ @@ -2091,11 +2087,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(sycl::half, sycl::half, float, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM #define GEMM_STRIDED_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, \ - int64_t stridea, const TYPE_B *b, int64_t ldb, int64_t strideb, \ - TYPE_S beta, TYPE_C *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, \ + int64_t stridea, const TYPE_B* b, int64_t ldb, int64_t strideb, \ + TYPE_S beta, TYPE_C* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -2108,11 +2104,11 @@ GEMM_STRIDED_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, transpose *transb, - int64_t *m, int64_t *n, int64_t *k, Ts *alpha, const Ta **a, - int64_t *lda, const Tb **b, int64_t *ldb, Ts *beta, Tc **c, - int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event gemm_batch_usm_impl(sycl::queue& queue, transpose* transa, transpose* transb, + int64_t* m, int64_t* n, int64_t* k, Ts* alpha, const Ta** a, + int64_t* lda, const Tb** b, int64_t* ldb, Ts* beta, Tc** c, + int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { for (int64_t i = 0; i < group_count; i++) { std::swap(transa[i], transb[i]); } @@ -2122,11 +2118,11 @@ inline sycl::event gemm_batch_usm_impl(sycl::queue &queue, transpose *transa, tr } #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return gemm_batch_usm_impl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc, group_count, group_size, dependencies); \ } @@ -2143,11 +2139,11 @@ GEMM_BATCH_LAUNCHER_USM(sycl::half, sycl::half, float, float) #undef GEMM_BATCH_LAUNCHER_USM #define GEMM_BATCH_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S) \ - sycl::event gemm_batch(sycl::queue &queue, transpose *transa, transpose *transb, int64_t *m, \ - int64_t *n, int64_t *k, TYPE_S *alpha, const TYPE_A **a, int64_t *lda, \ - const TYPE_B **b, int64_t *ldb, TYPE_S *beta, TYPE_C **c, int64_t *ldc, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event gemm_batch(sycl::queue& queue, transpose* transa, transpose* transb, int64_t* m, \ + int64_t* n, int64_t* k, TYPE_S* alpha, const TYPE_A** a, int64_t* lda, \ + const TYPE_B** b, int64_t* ldb, TYPE_S* beta, TYPE_C** c, int64_t* ldc, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ throw unimplemented("blas", "gemm_batch", \ std::string("for dtype unimplemented dtype combination <") + \ dtype_string() + "," + dtype_string() + "," + \ @@ -2160,11 +2156,11 @@ GEMM_BATCH_LAUNCHER_USM(std::int8_t, std::int8_t, std::int32_t, float) #undef GEMM_BATCH_LAUNCHER_USM template -inline sycl::event trsm_batch(Func func, sycl::queue &queue, side left_right, uplo upper_lower, +inline sycl::event trsm_batch(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, - const T *a, int64_t lda, int64_t stridea, T *b, int64_t ldb, + const T* a, int64_t lda, int64_t stridea, T* b, int64_t ldb, int64_t strideb, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -2175,10 +2171,10 @@ inline sycl::event trsm_batch(Func func, sycl::queue &queue, side left_right, up } #define TRSM_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, int64_t strideb, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, int64_t strideb, \ + int64_t batch_size, const std::vector& dependencies) { \ return trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, stridea, b, ldb, strideb, batch_size, dependencies); \ } @@ -2191,10 +2187,10 @@ TRSM_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_ztrsm_strided_batc #undef TRSM_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event trsm_batch(Func func, sycl::queue &queue, side *left_right, uplo *upper_lower, - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, T *alpha, - const T **a, int64_t *lda, T **b, int64_t *ldb, int64_t group_count, - int64_t *group_size, const std::vector &dependencies) { +inline sycl::event trsm_batch(Func func, sycl::queue& queue, side* left_right, uplo* upper_lower, + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, T* alpha, + const T** a, int64_t* lda, T** b, int64_t* ldb, int64_t group_count, + int64_t* group_size, const std::vector& dependencies) { for (int64_t i = 0; i < group_count; i++) { const auto new_side = left_right[i] == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; @@ -2210,11 +2206,11 @@ inline sycl::event trsm_batch(Func func, sycl::queue &queue, side *left_right, u } #define TRSM_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm_batch(sycl::queue &queue, side *left_right, uplo *upper_lower, \ - transpose *trans, diag *unit_diag, int64_t *m, int64_t *n, TYPE *alpha, \ - const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event trsm_batch(sycl::queue& queue, side* left_right, uplo* upper_lower, \ + transpose* trans, diag* unit_diag, int64_t* m, int64_t* n, TYPE* alpha, \ + const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return trsm_batch(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, group_count, group_size, dependencies); \ } @@ -2227,10 +2223,10 @@ TRSM_BATCH_LAUNCHER_USM(std::complex, rocblas_ztrsm_batched) #undef TRSM_BATCH_LAUNCHER_USM template -inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo *upper_lower, transpose *trans, - int64_t *n, int64_t *k, T *alpha, const T **a, int64_t *lda, T *beta, - T **c, int64_t *ldc, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo* upper_lower, transpose* trans, + int64_t* n, int64_t* k, T* alpha, const T** a, int64_t* lda, T* beta, + T** c, int64_t* ldc, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { for (int64_t i = 0; i < group_count; i++) { const auto new_uplo = upper_lower[i] == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2247,10 +2243,10 @@ inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo *upper_lower, } #define SYRK_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk_batch(sycl::queue &queue, uplo *upper_lower, transpose *trans, int64_t *n, \ - int64_t *k, TYPE *alpha, const TYPE **a, int64_t *lda, TYPE *beta, \ - TYPE **c, int64_t *ldc, int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event syrk_batch(sycl::queue& queue, uplo* upper_lower, transpose* trans, int64_t* n, \ + int64_t* k, TYPE* alpha, const TYPE** a, int64_t* lda, TYPE* beta, \ + TYPE** c, int64_t* ldc, int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, \ c, ldc, group_count, group_size, dependencies); \ } @@ -2263,10 +2259,10 @@ SYRK_BATCH_LAUNCHER_USM(std::complex, rocblas_zsyrk_batched) #undef SYRK_BATCH_LAUNCHER_USM template -inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, const T alpha, const T *a, int64_t lda, - int64_t stridea, const T beta, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event syrk_batch(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, const T alpha, const T* a, int64_t lda, + int64_t stridea, const T beta, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2277,11 +2273,11 @@ inline sycl::event syrk_batch(Func func, sycl::queue &queue, uplo upper_lower, t } #define SYRK_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk_batch(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, \ - int64_t k, const TYPE alpha, const TYPE *a, int64_t lda, \ - int64_t stridea, const TYPE beta, TYPE *c, int64_t ldc, \ + sycl::event syrk_batch(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, \ + int64_t k, const TYPE alpha, const TYPE* a, int64_t lda, \ + int64_t stridea, const TYPE beta, TYPE* c, int64_t ldc, \ int64_t stridec, int64_t batch_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return syrk_batch(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, \ stridea, beta, c, ldc, stridec, batch_size, dependencies); \ } @@ -2294,20 +2290,20 @@ SYRK_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zsyrk_strided_batc #undef SYRK_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, const T alpha, const T *a, int64_t lda, - int64_t stridea, T *b, int64_t ldb, int64_t strideb, +inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, const T alpha, const T* a, int64_t lda, + int64_t stridea, T* b, int64_t ldb, int64_t strideb, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return column_major::omatcopy_batch(func, queue, trans, n, m, alpha, a, lda, stridea, b, ldb, strideb, batch_size, dependencies); } #define OMATCOPY_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, const TYPE *a, int64_t lda, int64_t stridea, \ - TYPE *b, int64_t ldb, int64_t strideb, int64_t batch_size, \ - const std::vector &dependencies) { \ + sycl::event omatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, const TYPE* a, int64_t lda, int64_t stridea, \ + TYPE* b, int64_t ldb, int64_t strideb, int64_t batch_size, \ + const std::vector& dependencies) { \ return omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, stridea, b, ldb, \ strideb, batch_size, dependencies); \ } @@ -2319,49 +2315,49 @@ OMATCOPY_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_strided_ #undef OMATCOPY_STRIDED_BATCH_LAUNCHER_USM -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, +sycl::event imatcopy_batch(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, int64_t stride, int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } template -inline sycl::event omatadd_batch(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, const T *a, int64_t lda, - int64_t stridea, const T beta, const T *b, int64_t ldb, - int64_t strideb, T *c, int64_t ldc, int64_t stridec, - int64_t batch_size, const std::vector &dependencies) { +inline sycl::event omatadd_batch(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, const T* a, int64_t lda, + int64_t stridea, const T beta, const T* b, int64_t ldb, + int64_t strideb, T* c, int64_t ldc, int64_t stridec, + int64_t batch_size, const std::vector& dependencies) { return column_major::omatadd_batch(func, queue, transa, transb, n, m, alpha, a, lda, stridea, beta, b, ldb, strideb, c, ldc, stridec, batch_size, dependencies); } #define OMATADD_STRIDED_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatadd_batch(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, const TYPE *a, int64_t lda, \ - int64_t stridea, const TYPE beta, const TYPE *b, int64_t ldb, \ - int64_t strideb, TYPE *c, int64_t ldc, int64_t stridec, \ - int64_t batch_size, const std::vector &dependencies) { \ + sycl::event omatadd_batch(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, const TYPE* a, int64_t lda, \ + int64_t stridea, const TYPE beta, const TYPE* b, int64_t ldb, \ + int64_t strideb, TYPE* c, int64_t ldc, int64_t stridec, \ + int64_t batch_size, const std::vector& dependencies) { \ return omatadd_batch(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, stridea, \ beta, b, ldb, strideb, c, ldc, stridec, batch_size, dependencies); \ } @@ -2374,19 +2370,19 @@ OMATADD_STRIDED_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_strided_b #undef OMATADD_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event omatcopy_batch(Func func, sycl::queue &queue, transpose *trans, int64_t *m, - int64_t *n, T *alpha, const T **a, int64_t *lda, T **b, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +inline sycl::event omatcopy_batch(Func func, sycl::queue& queue, transpose* trans, int64_t* m, + int64_t* n, T* alpha, const T** a, int64_t* lda, T** b, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { return column_major::omatcopy_batch(func, queue, trans, n, m, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } #define OMATCOPY_BATCH_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, \ - TYPE *alpha, const TYPE **a, int64_t *lda, TYPE **b, int64_t *ldb, \ - int64_t group_count, int64_t *group_size, \ - const std::vector &dependencies) { \ + sycl::event omatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, \ + TYPE* alpha, const TYPE** a, int64_t* lda, TYPE** b, int64_t* ldb, \ + int64_t group_count, int64_t* group_size, \ + const std::vector& dependencies) { \ return omatcopy_batch(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb, \ group_count, group_size, dependencies); \ } @@ -2398,31 +2394,31 @@ OMATCOPY_BATCH_LAUNCHER_USM(std::complex, rocblas_zgeam_batched) #undef OMATCOPY_BATCH_LAUNCHER_USM -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - float *alpha, float **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + float* alpha, float** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - double *alpha, double **ab, int64_t *lda, int64_t *ldb, - int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + double* alpha, double** ab, int64_t* lda, int64_t* ldb, + int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } -sycl::event imatcopy_batch(sycl::queue &queue, transpose *trans, int64_t *m, int64_t *n, - std::complex *alpha, std::complex **ab, int64_t *lda, - int64_t *ldb, int64_t group_count, int64_t *group_size, - const std::vector &dependencies) { +sycl::event imatcopy_batch(sycl::queue& queue, transpose* trans, int64_t* m, int64_t* n, + std::complex* alpha, std::complex** ab, int64_t* lda, + int64_t* ldb, int64_t group_count, int64_t* group_size, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy_batch", "for row_major layout"); } diff --git a/src/blas/backends/rocblas/rocblas_extensions.cpp b/src/blas/backends/rocblas/rocblas_extensions.cpp index a1fd1df1c..5fa5b61aa 100644 --- a/src/blas/backends/rocblas/rocblas_extensions.cpp +++ b/src/blas/backends/rocblas/rocblas_extensions.cpp @@ -33,65 +33,65 @@ namespace column_major { // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for column_major layout"); } template -inline void omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, +inline void omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); @@ -100,26 +100,26 @@ inline void omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, const int64_t new_m = trans == oneapi::mkl::transpose::nontrans ? m : n; const int64_t new_n = trans == oneapi::mkl::transpose::nontrans ? n : m; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, get_rocblas_operation(trans), get_rocblas_operation(trans), new_m, new_n, - (rocDataType *)&alpha, a_, lda, (rocDataType *)&beta, nullptr, + (rocDataType*)&alpha, a_, lda, (rocDataType*)&beta, nullptr, lda, b_, ldb); }); }); } #define OMATCOPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, const TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, const TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { \ omatcopy(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb); \ } @@ -131,16 +131,16 @@ OMATCOPY_LAUNCHER(std::complex, rocblas_zgeam) #undef OMATCOPY_LAUNCHER template -void omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb) { \ + void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb) { \ omatcopy2(#ROCBLAS_ROUTINE, ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, lda, \ b, ldb, strideb); \ } @@ -152,55 +152,55 @@ OMATCOPY2_LAUNCHER(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } template -inline void omatadd(Func func, sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, const T alpha, sycl::buffer &a, int64_t lda, const T beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +inline void omatadd(Func func, sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, const T alpha, sycl::buffer& a, int64_t lda, const T beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, (rocDataType *)&alpha, a_, - lda, (rocDataType *)&beta, b_, ldb, c_, ldc); + get_rocblas_operation(transb), m, n, (rocDataType*)&alpha, a_, + lda, (rocDataType*)&beta, b_, ldb, c_, ldc); }); }); } #define OMATADD_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - const TYPE alpha, sycl::buffer &a, int64_t lda, const TYPE beta, \ - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { \ + void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + const TYPE alpha, sycl::buffer& a, int64_t lda, const TYPE beta, \ + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { \ omatadd(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, \ ldc); \ } @@ -214,72 +214,72 @@ OMATADD_LAUNCHER(std::complex, rocblas_zgeam) // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for column_major layout"); } template -inline sycl::event omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); @@ -287,17 +287,17 @@ inline sycl::event omatcopy(Func func, sycl::queue &queue, transpose trans, int6 const int64_t new_m = trans == oneapi::mkl::transpose::nontrans ? m : n; const int64_t new_n = trans == oneapi::mkl::transpose::nontrans ? n : m; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, get_rocblas_operation(trans), get_rocblas_operation(trans), new_m, new_n, - (rocDataType *)&alpha, a_, lda, (rocDataType *)&beta, nullptr, + (rocDataType*)&alpha, a_, lda, (rocDataType*)&beta, nullptr, lda, b_, ldb); }); }); @@ -306,9 +306,9 @@ inline sycl::event omatcopy(Func func, sycl::queue &queue, transpose trans, int6 } #define OMATCOPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, const TYPE *a, int64_t lda, TYPE *b, int64_t ldb, \ - const std::vector &dependencies) { \ + sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, const TYPE* a, int64_t lda, TYPE* b, int64_t ldb, \ + const std::vector& dependencies) { \ return omatcopy(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -320,16 +320,16 @@ OMATCOPY_LAUNCHER_USM(std::complex, rocblas_zgeam) #undef OMATCOPY_LAUNCHER_USM template -sycl::event omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, int64_t stridea, T *b, - int64_t ldb, int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, int64_t stridea, T* b, + int64_t ldb, int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, \ - int64_t strideb, const std::vector &dependencies) { \ + sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, \ + int64_t strideb, const std::vector& dependencies) { \ return omatcopy2(#ROCBLAS_ROUTINE, ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, \ lda, b, ldb, strideb, dependencies); \ } @@ -341,50 +341,50 @@ OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER_USM -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for column_major layout"); } template -inline sycl::event omatadd(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, const T *a, int64_t lda, - const T beta, const T *b, int64_t ldb, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event omatadd(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, const T* a, int64_t lda, + const T beta, const T* b, int64_t ldb, T* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, (rocDataType *)&alpha, a_, - lda, (rocDataType *)&beta, b_, ldb, c_, ldc); + get_rocblas_operation(transb), m, n, (rocDataType*)&alpha, a_, + lda, (rocDataType*)&beta, b_, ldb, c_, ldc); }); }); @@ -392,10 +392,10 @@ inline sycl::event omatadd(Func func, sycl::queue &queue, transpose transa, tran } #define OMATADD_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, const TYPE *a, int64_t lda, const TYPE beta, \ - const TYPE *b, int64_t ldb, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, const TYPE* a, int64_t lda, const TYPE beta, \ + const TYPE* b, int64_t ldb, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return omatadd(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, \ c, ldc, dependencies); \ } @@ -413,72 +413,72 @@ namespace row_major { // Buffer APIs -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - int8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + int8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, int64_t m, - int64_t n, int64_t k, float alpha, sycl::buffer &a, int64_t lda, - uint8_t ao, sycl::buffer &b, int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, int64_t ldc, sycl::buffer &co) { +void gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, int64_t m, + int64_t n, int64_t k, float alpha, sycl::buffer& a, int64_t lda, + uint8_t ao, sycl::buffer& b, int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, int64_t ldc, sycl::buffer& co) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, float alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, float beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, float alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, float beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, double alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, double beta, sycl::buffer &c, +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, double alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, double beta, sycl::buffer& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -void gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, - int64_t k, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &b, int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, int64_t ldc) { +void gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, int64_t n, + int64_t k, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& b, int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, int64_t ldc) { throw unimplemented("blas", "gemmt", "for row_major layout"); } template -inline void omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, +inline void omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { column_major::omatcopy(func, queue, trans, n, m, alpha, a, lda, b, ldb); } #define OMATCOPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, const TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, const TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& b, int64_t ldb) { \ omatcopy(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb); \ } @@ -490,16 +490,16 @@ OMATCOPY_LAUNCHER(std::complex, rocblas_zgeam) #undef OMATCOPY_LAUNCHER template -void omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, std::int64_t stridea, - sycl::buffer &b, int64_t ldb, std::int64_t strideb) { +void omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, std::int64_t stridea, + sycl::buffer& b, int64_t ldb, std::int64_t strideb) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, int64_t stridea, \ - sycl::buffer &b, int64_t ldb, int64_t strideb) { \ + void omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, int64_t stridea, \ + sycl::buffer& b, int64_t ldb, int64_t strideb) { \ omatcopy2(#ROCBLAS_ROUTINE, ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, lda, \ b, ldb, strideb); \ } @@ -511,37 +511,37 @@ OMATCOPY2_LAUNCHER(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - sycl::buffer &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + sycl::buffer& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -void imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, int64_t lda, int64_t ldb) { +void imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& ab, int64_t lda, int64_t ldb) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } template -inline void omatadd(Func func, sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, const T alpha, sycl::buffer &a, int64_t lda, const T beta, - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { +inline void omatadd(Func func, sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, const T alpha, sycl::buffer& a, int64_t lda, const T beta, + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { column_major::omatadd(func, queue, transa, transb, n, m, alpha, a, lda, beta, b, ldb, c, ldc); } #define OMATADD_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - const TYPE alpha, sycl::buffer &a, int64_t lda, const TYPE beta, \ - sycl::buffer &b, int64_t ldb, sycl::buffer &c, int64_t ldc) { \ + void omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + const TYPE alpha, sycl::buffer& a, int64_t lda, const TYPE beta, \ + sycl::buffer& b, int64_t ldb, sycl::buffer& c, int64_t ldc) { \ omatadd(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, \ ldc); \ } @@ -555,79 +555,79 @@ OMATADD_LAUNCHER(std::complex, rocblas_zgeam) // USM APIs -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const int8_t *a, int64_t lda, - int8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const int8_t* a, int64_t lda, + int8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const int8_t *b, int64_t ldb, int8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const int8_t* b, int64_t ldb, int8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemm_bias(sycl::queue &queue, transpose transa, transpose transb, offset offsetc, - int64_t m, int64_t n, int64_t k, float alpha, const uint8_t *a, int64_t lda, - uint8_t ao, const uint8_t *b, int64_t ldb, uint8_t bo, float beta, int32_t *c, - int64_t ldc, const int32_t *co, - const std::vector &dependencies) { +sycl::event gemm_bias(sycl::queue& queue, transpose transa, transpose transb, offset offsetc, + int64_t m, int64_t n, int64_t k, float alpha, const uint8_t* a, int64_t lda, + uint8_t ao, const uint8_t* b, int64_t ldb, uint8_t bo, float beta, int32_t* c, + int64_t ldc, const int32_t* co, + const std::vector& dependencies) { throw unimplemented("blas", "gemm_bias", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, float alpha, const float *a, int64_t lda, const float *b, - int64_t ldb, float beta, float *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, float alpha, const float* a, int64_t lda, const float* b, + int64_t ldb, float beta, float* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } -sycl::event gemmt(sycl::queue &queue, uplo upper_lower, transpose transa, transpose transb, - int64_t n, int64_t k, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *b, int64_t ldb, - std::complex beta, std::complex *c, int64_t ldc, - const std::vector &dependencies) { +sycl::event gemmt(sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, + int64_t n, int64_t k, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* b, int64_t ldb, + std::complex beta, std::complex* c, int64_t ldc, + const std::vector& dependencies) { throw unimplemented("blas", "gemmt", "for row_major layout"); } template -inline sycl::event omatcopy(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - const T alpha, const T *a, int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event omatcopy(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + const T alpha, const T* a, int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { return column_major::omatcopy(func, queue, trans, n, m, alpha, a, lda, b, ldb, dependencies); } #define OMATCOPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, \ - const TYPE alpha, const TYPE *a, int64_t lda, TYPE *b, int64_t ldb, \ - const std::vector &dependencies) { \ + sycl::event omatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, \ + const TYPE alpha, const TYPE* a, int64_t lda, TYPE* b, int64_t ldb, \ + const std::vector& dependencies) { \ return omatcopy(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); \ } @@ -639,16 +639,16 @@ OMATCOPY_LAUNCHER_USM(std::complex, rocblas_zgeam) #undef OMATCOPY_LAUNCHER_USM template -sycl::event omatcopy2(const char *func_name, Func func, sycl::queue &queue, transpose trans, - int64_t m, int64_t n, T alpha, const T *a, int64_t lda, int64_t stridea, T *b, - int64_t ldb, int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(const char* func_name, Func func, sycl::queue& queue, transpose trans, + int64_t m, int64_t n, T alpha, const T* a, int64_t lda, int64_t stridea, T* b, + int64_t ldb, int64_t strideb, const std::vector& dependencies) { throw unimplemented("blas", "omatcopy2", ""); } #define OMATCOPY2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatcopy2(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, int64_t stridea, TYPE *b, int64_t ldb, \ - int64_t strideb, const std::vector &dependencies) { \ + sycl::event omatcopy2(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, int64_t stridea, TYPE* b, int64_t ldb, \ + int64_t strideb, const std::vector& dependencies) { \ return omatcopy2(#ROCBLAS_ROUTINE, ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, stridea, \ lda, b, ldb, strideb, dependencies); \ } @@ -660,44 +660,44 @@ OMATCOPY2_LAUNCHER_USM(std::complex, "unimplemented") #undef OMATCOPY2_LAUNCHER_USM -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, float alpha, - float *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, float alpha, + float* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, double alpha, - double *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, double alpha, + double* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } -sycl::event imatcopy(sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, std::complex *ab, int64_t lda, int64_t ldb, - const std::vector &dependencies) { +sycl::event imatcopy(sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, std::complex* ab, int64_t lda, int64_t ldb, + const std::vector& dependencies) { throw unimplemented("blas", "imatcopy", "for row_major layout"); } template -inline sycl::event omatadd(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, const T alpha, const T *a, int64_t lda, - const T beta, const T *b, int64_t ldb, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event omatadd(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, const T alpha, const T* a, int64_t lda, + const T beta, const T* b, int64_t ldb, T* c, int64_t ldc, + const std::vector& dependencies) { return column_major::omatadd(func, queue, transa, transb, n, m, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } #define OMATADD_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event omatadd(sycl::queue &queue, transpose transa, transpose transb, int64_t m, \ - int64_t n, const TYPE alpha, const TYPE *a, int64_t lda, const TYPE beta, \ - const TYPE *b, int64_t ldb, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event omatadd(sycl::queue& queue, transpose transa, transpose transb, int64_t m, \ + int64_t n, const TYPE alpha, const TYPE* a, int64_t lda, const TYPE beta, \ + const TYPE* b, int64_t ldb, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return omatadd(ROCBLAS_ROUTINE, queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, \ c, ldc, dependencies); \ } diff --git a/src/blas/backends/rocblas/rocblas_handle.hpp b/src/blas/backends/rocblas/rocblas_handle.hpp index 7a8dfe91f..cde400bfb 100644 --- a/src/blas/backends/rocblas/rocblas_handle.hpp +++ b/src/blas/backends/rocblas/rocblas_handle.hpp @@ -30,10 +30,10 @@ namespace rocblas { template struct rocblas_handle_ { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t rocblas_handle_mapper_{}; ~rocblas_handle_() noexcept(false) { - for (auto &handle_pair : rocblas_handle_mapper_) { + for (auto& handle_pair : rocblas_handle_mapper_) { rocblas_status err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); diff --git a/src/blas/backends/rocblas/rocblas_helper.hpp b/src/blas/backends/rocblas/rocblas_helper.hpp index 7fd45bdca..e28139ef3 100644 --- a/src/blas/backends/rocblas/rocblas_helper.hpp +++ b/src/blas/backends/rocblas/rocblas_helper.hpp @@ -77,7 +77,7 @@ void overflow_check(Index index, Next... indices) { class rocblas_error : virtual public std::runtime_error { protected: - inline const char *rocblas_error_map(rocblas_status error) { + inline const char* rocblas_error_map(rocblas_status error) { switch (error) { case rocblas_status_success: return "rocblas_status_success"; case rocblas_status_invalid_handle: return "rocblas_status_invalid_handle"; @@ -124,7 +124,7 @@ class rocblas_error : virtual public std::runtime_error { class hip_error : virtual public std::runtime_error { protected: - inline const char *hip_error_map(hipError_t result) { + inline const char* hip_error_map(hipError_t result) { return hipGetErrorName(result); } int error_number; ///< error number @@ -174,12 +174,12 @@ class hip_error : virtual public std::runtime_error { HIP_ERROR_FUNC(hipStreamSynchronize, hip_err, currentStreamId); template -inline void rocblas_native_func(Func func, rocblas_status err, - rocblas_handle handle, Types... args) { +inline void rocblas_native_func(Func func, rocblas_status err, rocblas_handle handle, + Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - ROCBLAS_ERROR_FUNC(func, err, handle, args...) + ROCBLAS_ERROR_FUNC(func, err, handle, args...) #else - ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, args...) + ROCBLAS_ERROR_FUNC_SYNC(func, err, handle, args...) #endif }; diff --git a/src/blas/backends/rocblas/rocblas_level1.cpp b/src/blas/backends/rocblas/rocblas_level1.cpp index 9a4f509dc..0eb5489df 100644 --- a/src/blas/backends/rocblas/rocblas_level1.cpp +++ b/src/blas/backends/rocblas/rocblas_level1.cpp @@ -34,16 +34,16 @@ namespace column_major { // Buffer APIs template -inline void asum(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void asum(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host // when the data is on buffer, it must be set to @@ -51,8 +51,8 @@ inline void asum(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto res_ = sc.get_mem(res_acc); rocblas_status err; // ASUM does not support negative index rocblas_native_func(func, err, handle, n, x_, std::abs(incx), res_); @@ -65,8 +65,8 @@ inline void asum(Func func, sycl::queue &queue, int64_t n, sycl::buffer & } #define ASUM_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ asum(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -78,26 +78,26 @@ ASUM_LAUNCHER(std::complex, double, rocblas_dzasum) #undef ASUM_LAUNCHER template -inline void scal(Func func, sycl::queue &queue, int64_t n, T1 a, sycl::buffer &x, +inline void scal(Func func, sycl::queue& queue, int64_t n, T1 a, sycl::buffer& x, int64_t incx) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; // SCAL does not support negative incx - rocblas_native_func(func, err, handle, n, (rocDataType1 *)&a, x_, std::abs(incx)); + rocblas_native_func(func, err, handle, n, (rocDataType1*)&a, x_, std::abs(incx)); }); }); } #define SCAL_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void scal(sycl::queue &queue, int64_t n, TYPE1 a, sycl::buffer &x, int64_t incx) { \ + void scal(sycl::queue& queue, int64_t n, TYPE1 a, sycl::buffer& x, int64_t incx) { \ scal(ROCBLAS_ROUTINE, queue, n, a, x, incx); \ } @@ -111,29 +111,28 @@ SCAL_LAUNCHER(double, std::complex, rocblas_zdscal) #undef SCAL_LAUNCHER template -inline void axpy(Func func, sycl::queue &queue, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +inline void axpy(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; - rocblas_native_func(func, err, handle, n, (rocDataType *)&alpha, x_, incx, y_, - incy); + rocblas_native_func(func, err, handle, n, (rocDataType*)&alpha, x_, incx, y_, incy); }); }); } #define AXPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void axpy(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void axpy(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ axpy(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy); \ } @@ -144,40 +143,40 @@ AXPY_LAUNCHER(std::complex, rocblas_zaxpy) #undef AXPY_LAUNCHER -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for column_major layout"); } template -inline void rotg(Func func, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +inline void rotg(Func func, sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto s_acc = s.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host // when the data is on buffer, it must be set to @@ -185,10 +184,10 @@ inline void rotg(Func func, sycl::queue &queue, sycl::buffer &a, sycl::bu // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); - auto s_ = sc.get_mem(s_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); + auto s_ = sc.get_mem(s_acc); rocblas_status err; rocblas_native_func(func, err, handle, a_, b_, c_, s_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -200,8 +199,8 @@ inline void rotg(Func func, sycl::queue &queue, sycl::buffer &a, sycl::bu } #define ROTG_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, \ - sycl::buffer &c, sycl::buffer &s) { \ + void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, \ + sycl::buffer& c, sycl::buffer& s) { \ rotg(ROCBLAS_ROUTINE, queue, a, b, c, s); \ } @@ -213,16 +212,16 @@ ROTG_LAUNCHER(std::complex, double, rocblas_zrotg) #undef ROTG_LAUNCHER template -inline void rotm(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { +inline void rotm(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); auto param_acc = param.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -231,9 +230,9 @@ inline void rotm(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto param_ = sc.get_mem(param_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto param_ = sc.get_mem(param_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, param_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -245,8 +244,8 @@ inline void rotm(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x } #define ROTM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { \ + void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy, sycl::buffer& param) { \ rotm(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, param); \ } @@ -256,19 +255,19 @@ ROTM_LAUNCHER(double, rocblas_drotm) #undef ROTM_LAUNCHER template -inline void copy(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { +inline void copy(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy); }); @@ -276,8 +275,8 @@ inline void copy(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x } #define COPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ copy(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -289,16 +288,16 @@ COPY_LAUNCHER(std::complex, rocblas_zcopy) #undef COPY_LAUNCHER template -inline void dot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +inline void dot(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -307,9 +306,9 @@ inline void dot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto res_ = sc.get_mem(res_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -321,8 +320,8 @@ inline void dot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, } #define DOT_LAUNCHER(EXT, TYPE, ROCBLAS_ROUTINE) \ - void dot##EXT(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, const int64_t incy, sycl::buffer &result) { \ + void dot##EXT(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, const int64_t incy, sycl::buffer& result) { \ dot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, result); \ } @@ -335,23 +334,23 @@ DOT_LAUNCHER(c, std::complex, rocblas_zdotc) #undef DOT_LAUNCHER -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", "for column_major layout"); } template -inline void rot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &y, int64_t incy, T2 c, T3 s) { +inline void rot(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& y, int64_t incy, T2 c, T3 s) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; using rocDataType3 = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host // when the data is on buffer, it must be set to @@ -359,18 +358,18 @@ inline void rot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. // rocblas_set_pointer_mode(handle, rocblas_set_pointer_mode); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; - rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, (rocDataType2 *)&c, - (rocDataType3 *)&s); + rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, (rocDataType2*)&c, + (rocDataType3*)&s); }); }); } #define ROT_LAUNCHER(TYPE1, TYPE2, TYPE3, ROCBLAS_ROUTINE) \ - void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, int64_t incy, TYPE2 c, TYPE3 s) { \ + void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, int64_t incy, TYPE2 c, TYPE3 s) { \ rot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s); \ } @@ -381,16 +380,16 @@ ROT_LAUNCHER(std::complex, double, double, rocblas_zdrot) #undef ROT_LAUNCHER -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { overflow_check(n, incx, incy); // rocBLAS does not support sdot so we need to mimic sdot. - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.get_access(cgh); auto y_acc = y.get_access(cgh); auto res_acc = result.get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -399,9 +398,9 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); + auto res_ = sc.get_mem(res_acc); rocblas_status err; rocblas_native_func(rocblas_sdot, err, handle, n, x_, incx, y_, incy, res_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -417,18 +416,18 @@ void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, } template -inline void rotmg(Func func, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, T y1, sycl::buffer ¶m) { +inline void rotmg(Func func, sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, T y1, sycl::buffer& param) { using rocDataType = typename RocEquivalentType::Type; sycl::buffer y1_buff(&y1, sycl::range<1>(1)); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto d1_acc = d1.template get_access(cgh); auto d2_acc = d2.template get_access(cgh); auto x1_acc = x1.template get_access(cgh); auto y1_acc = y1_buff.template get_access(cgh); auto param_acc = param.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -437,11 +436,11 @@ inline void rotmg(Func func, sycl::queue &queue, sycl::buffer &d1, sycl::b // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto d1_ = sc.get_mem(d1_acc); - auto d2_ = sc.get_mem(d2_acc); - auto x1_ = sc.get_mem(x1_acc); - auto y1_ = sc.get_mem(y1_acc); - auto param_ = sc.get_mem(param_acc); + auto d1_ = sc.get_mem(d1_acc); + auto d2_ = sc.get_mem(d2_acc); + auto x1_ = sc.get_mem(x1_acc); + auto y1_ = sc.get_mem(y1_acc); + auto param_ = sc.get_mem(param_acc); rocblas_status err; rocblas_native_func(func, err, handle, d1_, d2_, x1_, y1_, param_); // Higher level BLAS functions expect rocblas_pointer_mode_host @@ -453,8 +452,8 @@ inline void rotmg(Func func, sycl::queue &queue, sycl::buffer &d1, sycl::b } #define ROTMG_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, \ - sycl::buffer &x1, TYPE y1, sycl::buffer ¶m) { \ + void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, \ + sycl::buffer& x1, TYPE y1, sycl::buffer& param) { \ rotmg(ROCBLAS_ROUTINE, queue, d1, d2, x1, y1, param); \ } @@ -464,8 +463,8 @@ ROTMG_LAUNCHER(double, rocblas_drotmg) #undef ROTMG_LAUNCHER template -inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void iamax(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); @@ -477,10 +476,10 @@ inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // to elementwise copy the data between two buffer, or allow reinterpret cast // to convert to different type with different typesize size. sycl::buffer int_res_buff{ sycl::range<1>(1) }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto int_res_acc = int_res_buff.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -489,8 +488,8 @@ inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto int_res_ = sc.get_mem(int_res_acc); + auto x_ = sc.get_mem(x_acc); + auto int_res_ = sc.get_mem(int_res_acc); rocblas_status err; // For negative incx, iamax returns 0. This behaviour is similar to that of // reference netlib BLAS. @@ -502,7 +501,7 @@ inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer & }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto int_res_acc = int_res_buff.template get_access(cgh); auto result_acc = result.template get_access(cgh); cgh.single_task( @@ -511,8 +510,8 @@ inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer & } #define IAMAX_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamax(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -524,19 +523,19 @@ IAMAX_LAUNCHER(std::complex, rocblas_izamax) #undef IAMAX_LAUNCHER template -inline void swap(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { +inline void swap(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy); }); @@ -544,8 +543,8 @@ inline void swap(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x } #define SWAP_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ swap(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -557,8 +556,8 @@ SWAP_LAUNCHER(std::complex, rocblas_zswap) #undef SWAP_LAUNCHER template -inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void iamin(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); @@ -570,10 +569,10 @@ inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // to elementwise copy the data between two buffer, or allow reinterpret cast // to convert to different type with different typesize size. sycl::buffer int_res_buff{ sycl::range<1>(1) }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto int_res_acc = int_res_buff.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -582,8 +581,8 @@ inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto int_res_ = sc.get_mem(int_res_acc); + auto x_ = sc.get_mem(x_acc); + auto int_res_ = sc.get_mem(int_res_acc); rocblas_status err; // For negative incx, iamin returns 0. This behaviour is similar to that of // implemented as a reference IAMIN. @@ -595,7 +594,7 @@ inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer & }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto int_res_acc = int_res_buff.template get_access(cgh); auto result_acc = result.template get_access(cgh); cgh.single_task( @@ -604,8 +603,8 @@ inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer & } #define IAMIN_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamin(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -617,16 +616,16 @@ IAMIN_LAUNCHER(std::complex, rocblas_izamin) #undef IAMIN_LAUNCHER template -inline void nrm2(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void nrm2(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto x_acc = x.template get_access(cgh); auto res_acc = result.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); // By default the pointer mode is the rocblas_pointer_mode_host @@ -635,8 +634,8 @@ inline void nrm2(Func func, sycl::queue &queue, int64_t n, sycl::buffer & // fault. When it is set to device it is users responsibility to // synchronise as the function is completely asynchronous. rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = sc.get_mem(x_acc); - auto res_ = sc.get_mem(res_acc); + auto x_ = sc.get_mem(x_acc); + auto res_ = sc.get_mem(res_acc); rocblas_status err; // NRM2 does not support negative index rocblas_native_func(func, err, handle, n, x_, std::abs(incx), res_); @@ -649,8 +648,8 @@ inline void nrm2(Func func, sycl::queue &queue, int64_t n, sycl::buffer & } #define NRM2_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ nrm2(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -664,20 +663,20 @@ NRM2_LAUNCHER(std::complex, double, rocblas_dznrm2) // USM APIs template -inline sycl::event asum(Func func, sycl::queue &queue, int64_t n, const T1 *x, const int64_t incx, - T2 *result, const std::vector &dependencies) { +inline sycl::event asum(Func func, sycl::queue& queue, int64_t n, const T1* x, const int64_t incx, + T2* result, const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = reinterpret_cast(x); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto res_ = reinterpret_cast(result); rocblas_status err; // ASUM does not support negative index rocblas_native_func(func, err, handle, n, x_, std::abs(incx), res_); @@ -689,8 +688,8 @@ inline sycl::event asum(Func func, sycl::queue &queue, int64_t n, const T1 *x, c } #define ASUM_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event asum(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event asum(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return asum(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -702,21 +701,21 @@ ASUM_LAUNCHER_USM(std::complex, double, rocblas_dzasum) #undef ASUM_LAUNCHER_USM template -inline sycl::event scal(Func func, sycl::queue &queue, int64_t n, T1 a, T2 *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event scal(Func func, sycl::queue& queue, int64_t n, T1 a, T2* x, int64_t incx, + const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); + auto x_ = reinterpret_cast(x); rocblas_status err; // SCAL does not support negative incx - rocblas_native_func(func, err, handle, n, (rocDataType1 *)&a, x_, std::abs(incx)); + rocblas_native_func(func, err, handle, n, (rocDataType1*)&a, x_, std::abs(incx)); }); }); @@ -724,8 +723,8 @@ inline sycl::event scal(Func func, sycl::queue &queue, int64_t n, T1 a, T2 *x, i } #define SCAL_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event scal(sycl::queue &queue, int64_t n, TYPE1 a, TYPE2 *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event scal(sycl::queue& queue, int64_t n, TYPE1 a, TYPE2* x, int64_t incx, \ + const std::vector& dependencies) { \ return scal(ROCBLAS_ROUTINE, queue, n, a, x, incx, dependencies); \ } @@ -739,21 +738,20 @@ SCAL_LAUNCHER_USM(double, std::complex, rocblas_zdscal) #undef SCAL_LAUNCHER_USM template -inline sycl::event axpy(Func func, sycl::queue &queue, int64_t n, T alpha, const T *x, int64_t incx, - T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event axpy(Func func, sycl::queue& queue, int64_t n, T alpha, const T* x, int64_t incx, + T* y, int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; - rocblas_native_func(func, err, handle, n, (rocDataType *)&alpha, x_, incx, y_, - incy); + rocblas_native_func(func, err, handle, n, (rocDataType*)&alpha, x_, incx, y_, incy); }); }); @@ -761,8 +759,8 @@ inline sycl::event axpy(Func func, sycl::queue &queue, int64_t n, T alpha, const } #define AXPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - TYPE *y, int64_t incy, const std::vector &dependencies) { \ + sycl::event axpy(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + TYPE* y, int64_t incy, const std::vector& dependencies) { \ return axpy(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, dependencies); \ } @@ -773,44 +771,44 @@ AXPY_LAUNCHER_USM(std::complex, rocblas_zaxpy) #undef AXPY_LAUNCHER_USM -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for column_major layout"); } template -inline sycl::event rotg(Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, T1 *s, - const std::vector &dependencies) { +inline sycl::event rotg(Func func, sycl::queue& queue, T1* a, T1* b, T2* c, T1* s, + const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); - auto s_ = reinterpret_cast(s); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); rocblas_status err; rocblas_native_func(func, err, handle, a_, b_, c_, s_); }); @@ -820,8 +818,8 @@ inline sycl::event rotg(Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, T1 * } #define ROTG_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event rotg(sycl::queue &queue, TYPE1 *a, TYPE1 *b, TYPE2 *c, TYPE1 *s, \ - const std::vector &dependencies) { \ + sycl::event rotg(sycl::queue& queue, TYPE1* a, TYPE1* b, TYPE2* c, TYPE1* s, \ + const std::vector& dependencies) { \ return rotg(ROCBLAS_ROUTINE, queue, a, b, c, s, dependencies); \ } @@ -833,19 +831,19 @@ ROTG_LAUNCHER_USM(std::complex, double, rocblas_zrotg) #undef ROTG_LAUNCHER_USM template -inline sycl::event rotm(Func func, sycl::queue &queue, int64_t n, T *x, int64_t incx, T *y, - int64_t incy, T *param, const std::vector &dependencies) { +inline sycl::event rotm(Func func, sycl::queue& queue, int64_t n, T* x, int64_t incx, T* y, + int64_t incy, T* param, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto param_ = reinterpret_cast(param); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto param_ = reinterpret_cast(param); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, param_); }); @@ -855,8 +853,8 @@ inline sycl::event rotm(Func func, sycl::queue &queue, int64_t n, T *x, int64_t } #define ROTM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event rotm(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - TYPE *param, const std::vector &dependencies) { \ + sycl::event rotm(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + TYPE* param, const std::vector& dependencies) { \ return rotm(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, param, dependencies); \ } @@ -866,18 +864,18 @@ ROTM_LAUNCHER_USM(double, rocblas_drotm) #undef ROTM_LAUNCHER_USM template -inline sycl::event copy(Func func, sycl::queue &queue, int64_t n, const T *x, int64_t incx, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event copy(Func func, sycl::queue& queue, int64_t n, const T* x, int64_t incx, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy); }); @@ -887,8 +885,8 @@ inline sycl::event copy(Func func, sycl::queue &queue, int64_t n, const T *x, in } #define COPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event copy(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return copy(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -900,20 +898,20 @@ COPY_LAUNCHER_USM(std::complex, rocblas_zcopy) #undef COPY_LAUNCHER_USM template -inline sycl::event dot(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - const T *y, int64_t incy, T *result, - const std::vector &dependencies) { +inline sycl::event dot(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + const T* y, int64_t incy, T* result, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto res_ = reinterpret_cast(result); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, res_); }); @@ -923,9 +921,9 @@ inline sycl::event dot(Func func, sycl::queue &queue, int64_t n, const T *x, con } #define DOT_LAUNCHER_USM(EXT, TYPE, ROCBLAS_ROUTINE) \ - sycl::event dot##EXT(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - const TYPE *y, const int64_t incy, TYPE *result, \ - const std::vector &dependencies) { \ + sycl::event dot##EXT(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + const TYPE* y, const int64_t incy, TYPE* result, \ + const std::vector& dependencies) { \ return dot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, result, dependencies); \ } @@ -938,29 +936,29 @@ DOT_LAUNCHER_USM(c, std::complex, rocblas_zdotc) #undef DOT_LAUNCHER_USM -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { throw unimplemented("blas", "dot", "for column_major layout"); } template -inline sycl::event rot(Func func, sycl::queue &queue, int64_t n, T1 *x, const int64_t incx, T1 *y, - int64_t incy, T2 c, T3 s, const std::vector &dependencies) { +inline sycl::event rot(Func func, sycl::queue& queue, int64_t n, T1* x, const int64_t incx, T1* y, + int64_t incy, T2 c, T3 s, const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; using rocDataType3 = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; - rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, (rocDataType2 *)&c, - (rocDataType3 *)&s); + rocblas_native_func(func, err, handle, n, x_, incx, y_, incy, (rocDataType2*)&c, + (rocDataType3*)&s); }); }); @@ -968,9 +966,9 @@ inline sycl::event rot(Func func, sycl::queue &queue, int64_t n, T1 *x, const in } #define ROT_LAUNCHER_USM(TYPE1, TYPE2, TYPE3, ROCBLAS_ROUTINE) \ - sycl::event rot(sycl::queue &queue, int64_t n, TYPE1 *x, const int64_t incx, TYPE1 *y, \ + sycl::event rot(sycl::queue& queue, int64_t n, TYPE1* x, const int64_t incx, TYPE1* y, \ int64_t incy, TYPE2 c, TYPE3 s, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return rot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s, dependencies); \ } @@ -981,20 +979,20 @@ ROT_LAUNCHER_USM(std::complex, double, double, rocblas_zdrot) #undef ROT_LAUNCHER_USM -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { overflow_check(n, incx, incy); // rocBLAS does not support sdot so we need to mimic sdot. - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); + auto res_ = reinterpret_cast(result); rocblas_status err; rocblas_native_func(rocblas_sdot, err, handle, n, x_, incx, y_, incy, res_); }); @@ -1006,20 +1004,20 @@ sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int6 } template -inline sycl::event rotmg(Func func, sycl::queue &queue, T *d1, T *d2, T *x1, T y1, T *param, - const std::vector &dependencies) { +inline sycl::event rotmg(Func func, sycl::queue& queue, T* d1, T* d2, T* x1, T y1, T* param, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto d1_ = reinterpret_cast(d1); - auto d2_ = reinterpret_cast(d2); - auto x1_ = reinterpret_cast(x1); - auto y1_ = reinterpret_cast(&y1); - auto param_ = reinterpret_cast(param); + auto d1_ = reinterpret_cast(d1); + auto d2_ = reinterpret_cast(d2); + auto x1_ = reinterpret_cast(x1); + auto y1_ = reinterpret_cast(&y1); + auto param_ = reinterpret_cast(param); rocblas_status err; rocblas_native_func(func, err, handle, d1_, d2_, x1_, y1_, param_); }); @@ -1029,8 +1027,8 @@ inline sycl::event rotmg(Func func, sycl::queue &queue, T *d1, T *d2, T *x1, T y } #define ROTMG_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event rotmg(sycl::queue &queue, TYPE *d1, TYPE *d2, TYPE *x1, TYPE y1, TYPE *param, \ - const std::vector &dependencies) { \ + sycl::event rotmg(sycl::queue& queue, TYPE* d1, TYPE* d2, TYPE* x1, TYPE y1, TYPE* param, \ + const std::vector& dependencies) { \ return rotmg(ROCBLAS_ROUTINE, queue, d1, d2, x1, y1, param, dependencies); \ } @@ -1040,8 +1038,8 @@ ROTMG_LAUNCHER_USM(double, rocblas_drotmg) #undef ROTMG_LAUNCHER_USM template -inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - int64_t *result, const std::vector &dependencies) { +inline sycl::event iamax(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + int64_t* result, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); // rocBLAS does not support int64_t as return type for the data by default. So we need to @@ -1049,17 +1047,17 @@ inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, c // it back to the actual data on the host. // This change may cause failure as the result of integer overflow // based on the size. - auto int_res_p = (int *)sycl::aligned_alloc_shared(64, sizeof(rocblas_int), queue.get_device(), - queue.get_context()); + auto int_res_p = (int*)sycl::aligned_alloc_shared(64, sizeof(rocblas_int), queue.get_device(), + queue.get_context()); *int_res_p = 0; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = reinterpret_cast(x); - auto int_res_p_ = reinterpret_cast(int_res_p); + auto x_ = reinterpret_cast(x); + auto int_res_p_ = reinterpret_cast(int_res_p); rocblas_status err; // For negative incx, iamax returns 0. This behaviour is similar to that of // reference iamax. @@ -1074,8 +1072,8 @@ inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, c } #define IAMAX_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event iamax(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamax(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamax(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1087,18 +1085,18 @@ IAMAX_LAUNCHER_USM(std::complex, rocblas_izamax) #undef IAMAX_LAUNCHER_USM template -inline sycl::event swap(Func func, sycl::queue &queue, int64_t n, T *x, int64_t incx, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event swap(Func func, sycl::queue& queue, int64_t n, T* x, int64_t incx, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, n, x_, incx, y_, incy); }); @@ -1108,8 +1106,8 @@ inline sycl::event swap(Func func, sycl::queue &queue, int64_t n, T *x, int64_t } #define SWAP_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event swap(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event swap(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return swap(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1121,8 +1119,8 @@ SWAP_LAUNCHER_USM(std::complex, rocblas_zswap) #undef SWAP_LAUNCHER_USM template -inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - int64_t *result, const std::vector &dependencies) { +inline sycl::event iamin(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + int64_t* result, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); // rocBLAS does not support int64_t as return type for the data by default. So we need to @@ -1130,18 +1128,18 @@ inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, c // it back to the actual data on the host. // This change may cause failure as the result of integer overflow // based on the size. - auto int_res_p = (int *)sycl::aligned_alloc_shared(64, sizeof(rocblas_int), queue.get_device(), - queue.get_context()); + auto int_res_p = (int*)sycl::aligned_alloc_shared(64, sizeof(rocblas_int), queue.get_device(), + queue.get_context()); *int_res_p = 0; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = reinterpret_cast(x); - auto int_res_p_ = reinterpret_cast(int_res_p); + auto x_ = reinterpret_cast(x); + auto int_res_p_ = reinterpret_cast(int_res_p); rocblas_status err; // For negative incx, iamin returns 0. This behaviour is similar to that of // implemented iamin. @@ -1156,8 +1154,8 @@ inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, c } #define IAMIN_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event iamin(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamin(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamin(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1169,20 +1167,20 @@ IAMIN_LAUNCHER_USM(std::complex, rocblas_izamin) #undef IAMIN_LAUNCHER_USM template -inline sycl::event nrm2(Func func, sycl::queue &queue, int64_t n, const T1 *x, const int64_t incx, - T2 *result, const std::vector &dependencies) { +inline sycl::event nrm2(Func func, sycl::queue& queue, int64_t n, const T1* x, const int64_t incx, + T2* result, const std::vector& dependencies) { using rocDataType1 = typename RocEquivalentType::Type; using rocDataType2 = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); rocblas_set_pointer_mode(handle, rocblas_pointer_mode_device); - auto x_ = reinterpret_cast(x); - auto res_ = reinterpret_cast(result); + auto x_ = reinterpret_cast(x); + auto res_ = reinterpret_cast(result); rocblas_status err; // NRM2 does not support negative index rocblas_native_func(func, err, handle, n, x_, std::abs(incx), res_); @@ -1194,8 +1192,8 @@ inline sycl::event nrm2(Func func, sycl::queue &queue, int64_t n, const T1 *x, c } #define NRM2_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event nrm2(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event nrm2(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return nrm2(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1212,14 +1210,14 @@ namespace row_major { // Buffer APIs template -inline void asum(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void asum(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { column_major::asum(func, queue, n, x, incx, result); } #define ASUM_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void asum(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void asum(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ asum(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -1231,13 +1229,13 @@ ASUM_LAUNCHER(std::complex, double, rocblas_dzasum) #undef ASUM_LAUNCHER template -inline void scal(Func func, sycl::queue &queue, int64_t n, T1 a, sycl::buffer &x, +inline void scal(Func func, sycl::queue& queue, int64_t n, T1 a, sycl::buffer& x, int64_t incx) { column_major::scal(func, queue, n, a, x, incx); } #define SCAL_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void scal(sycl::queue &queue, int64_t n, TYPE1 a, sycl::buffer &x, int64_t incx) { \ + void scal(sycl::queue& queue, int64_t n, TYPE1 a, sycl::buffer& x, int64_t incx) { \ scal(ROCBLAS_ROUTINE, queue, n, a, x, incx); \ } @@ -1251,14 +1249,14 @@ SCAL_LAUNCHER(double, std::complex, rocblas_zdscal) #undef SCAL_LAUNCHER template -inline void axpy(Func func, sycl::queue &queue, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy) { +inline void axpy(Func func, sycl::queue& queue, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy) { column_major::axpy(func, queue, n, alpha, x, incx, y, incy); } #define AXPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void axpy(sycl::queue &queue, int64_t n, TYPE alpha, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void axpy(sycl::queue& queue, int64_t n, TYPE alpha, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ axpy(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy); \ } @@ -1269,37 +1267,37 @@ AXPY_LAUNCHER(std::complex, rocblas_zaxpy) #undef AXPY_LAUNCHER -void axpby(sycl::queue &queue, int64_t n, float alpha, sycl::buffer &x, int64_t incx, - float beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, float alpha, sycl::buffer& x, int64_t incx, + float beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, double alpha, sycl::buffer &x, int64_t incx, - double beta, sycl::buffer &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, double alpha, sycl::buffer& x, int64_t incx, + double beta, sycl::buffer& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } -void axpby(sycl::queue &queue, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +void axpby(sycl::queue& queue, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { throw unimplemented("blas", "axpby", "for row_major layout"); } template -inline void rotg(Func func, sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, - sycl::buffer &c, sycl::buffer &s) { +inline void rotg(Func func, sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, + sycl::buffer& c, sycl::buffer& s) { column_major::rotg(func, queue, a, b, c, s); } #define ROTG_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void rotg(sycl::queue &queue, sycl::buffer &a, sycl::buffer &b, \ - sycl::buffer &c, sycl::buffer &s) { \ + void rotg(sycl::queue& queue, sycl::buffer& a, sycl::buffer& b, \ + sycl::buffer& c, sycl::buffer& s) { \ rotg(ROCBLAS_ROUTINE, queue, a, b, c, s); \ } @@ -1311,14 +1309,14 @@ ROTG_LAUNCHER(std::complex, double, rocblas_zrotg) #undef ROTG_LAUNCHER template -inline void rotm(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { +inline void rotm(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& param) { column_major::rotm(func, queue, n, x, incx, y, incy, param); } #define ROTM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void rotm(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy, sycl::buffer ¶m) { \ + void rotm(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy, sycl::buffer& param) { \ rotm(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, param); \ } @@ -1328,14 +1326,14 @@ ROTM_LAUNCHER(double, rocblas_drotm) #undef ROTM_LAUNCHER template -inline void copy(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { +inline void copy(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { column_major::copy(func, queue, n, x, incx, y, incy); } #define COPY_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void copy(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void copy(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ copy(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -1347,14 +1345,14 @@ COPY_LAUNCHER(std::complex, rocblas_zcopy) #undef COPY_LAUNCHER template -inline void dot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +inline void dot(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { column_major::dot(func, queue, n, x, incx, y, incy, result); } #define DOT_LAUNCHER(EXT, TYPE, ROCBLAS_ROUTINE) \ - void dot##EXT(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, const int64_t incy, sycl::buffer &result) { \ + void dot##EXT(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, const int64_t incy, sycl::buffer& result) { \ dot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, result); \ } @@ -1367,20 +1365,20 @@ DOT_LAUNCHER(c, std::complex, rocblas_zdotc) #undef DOT_LAUNCHER -void dot(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void dot(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { throw unimplemented("blas", "dot", "for row_major layout"); } template -inline void rot(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &y, int64_t incy, T2 c, T3 s) { +inline void rot(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& y, int64_t incy, T2 c, T3 s) { column_major::rot(func, queue, n, x, incx, y, incy, c, s); } #define ROT_LAUNCHER(TYPE1, TYPE2, TYPE3, ROCBLAS_ROUTINE) \ - void rot(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &y, int64_t incy, TYPE2 c, TYPE3 s) { \ + void rot(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& y, int64_t incy, TYPE2 c, TYPE3 s) { \ rot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s); \ } @@ -1391,20 +1389,20 @@ ROT_LAUNCHER(std::complex, double, double, rocblas_zdrot) #undef ROT_LAUNCHER -void sdsdot(sycl::queue &queue, int64_t n, float sb, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy, sycl::buffer &result) { +void sdsdot(sycl::queue& queue, int64_t n, float sb, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy, sycl::buffer& result) { column_major::sdsdot(queue, n, sb, x, incx, y, incy, result); } template -inline void rotmg(Func func, sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, - sycl::buffer &x1, T y1, sycl::buffer ¶m) { +inline void rotmg(Func func, sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, + sycl::buffer& x1, T y1, sycl::buffer& param) { column_major::rotmg(func, queue, d1, d2, x1, y1, param); } #define ROTMG_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void rotmg(sycl::queue &queue, sycl::buffer &d1, sycl::buffer &d2, \ - sycl::buffer &x1, TYPE y1, sycl::buffer ¶m) { \ + void rotmg(sycl::queue& queue, sycl::buffer& d1, sycl::buffer& d2, \ + sycl::buffer& x1, TYPE y1, sycl::buffer& param) { \ rotmg(ROCBLAS_ROUTINE, queue, d1, d2, x1, y1, param); \ } @@ -1414,14 +1412,14 @@ ROTMG_LAUNCHER(double, rocblas_drotmg) #undef ROTMG_LAUNCHER template -inline void iamax(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void iamax(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { column_major::iamax(func, queue, n, x, incx, result); } #define IAMAX_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void iamax(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamax(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamax(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -1433,14 +1431,14 @@ IAMAX_LAUNCHER(std::complex, rocblas_izamax) #undef IAMAX_LAUNCHER template -inline void swap(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, - sycl::buffer &y, int64_t incy) { +inline void swap(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, + sycl::buffer& y, int64_t incy) { column_major::swap(func, queue, n, x, incx, y, incy); } #define SWAP_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void swap(sycl::queue &queue, int64_t n, sycl::buffer &x, int64_t incx, \ - sycl::buffer &y, int64_t incy) { \ + void swap(sycl::queue& queue, int64_t n, sycl::buffer& x, int64_t incx, \ + sycl::buffer& y, int64_t incy) { \ swap(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy); \ } @@ -1452,14 +1450,14 @@ SWAP_LAUNCHER(std::complex, rocblas_zswap) #undef SWAP_LAUNCHER template -inline void iamin(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void iamin(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { column_major::iamin(func, queue, n, x, incx, result); } #define IAMIN_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void iamin(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void iamin(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ iamin(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -1471,14 +1469,14 @@ IAMIN_LAUNCHER(std::complex, rocblas_izamin) #undef IAMIN_LAUNCHER template -inline void nrm2(Func func, sycl::queue &queue, int64_t n, sycl::buffer &x, - const int64_t incx, sycl::buffer &result) { +inline void nrm2(Func func, sycl::queue& queue, int64_t n, sycl::buffer& x, + const int64_t incx, sycl::buffer& result) { column_major::nrm2(func, queue, n, x, incx, result); } #define NRM2_LAUNCHER(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - void nrm2(sycl::queue &queue, int64_t n, sycl::buffer &x, const int64_t incx, \ - sycl::buffer &result) { \ + void nrm2(sycl::queue& queue, int64_t n, sycl::buffer& x, const int64_t incx, \ + sycl::buffer& result) { \ nrm2(ROCBLAS_ROUTINE, queue, n, x, incx, result); \ } @@ -1492,14 +1490,14 @@ NRM2_LAUNCHER(std::complex, double, rocblas_dznrm2) // USM APIs template -inline sycl::event asum(Func func, sycl::queue &queue, int64_t n, const T1 *x, const int64_t incx, - T2 *result, const std::vector &dependencies) { +inline sycl::event asum(Func func, sycl::queue& queue, int64_t n, const T1* x, const int64_t incx, + T2* result, const std::vector& dependencies) { return column_major::asum(func, queue, n, x, incx, result, dependencies); } #define ASUM_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event asum(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event asum(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return asum(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1511,14 +1509,14 @@ ASUM_LAUNCHER_USM(std::complex, double, rocblas_dzasum) #undef ASUM_LAUNCHER_USM template -inline sycl::event scal(Func func, sycl::queue &queue, int64_t n, T1 a, T2 *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event scal(Func func, sycl::queue& queue, int64_t n, T1 a, T2* x, int64_t incx, + const std::vector& dependencies) { return column_major::scal(func, queue, n, a, x, incx, dependencies); } #define SCAL_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event scal(sycl::queue &queue, int64_t n, TYPE1 a, TYPE2 *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event scal(sycl::queue& queue, int64_t n, TYPE1 a, TYPE2* x, int64_t incx, \ + const std::vector& dependencies) { \ return scal(ROCBLAS_ROUTINE, queue, n, a, x, incx, dependencies); \ } @@ -1532,14 +1530,14 @@ SCAL_LAUNCHER_USM(double, std::complex, rocblas_zdscal) #undef SCAL_LAUNCHER_USM template -inline sycl::event axpy(Func func, sycl::queue &queue, int64_t n, T alpha, const T *x, int64_t incx, - T *y, int64_t incy, const std::vector &dependencies) { +inline sycl::event axpy(Func func, sycl::queue& queue, int64_t n, T alpha, const T* x, int64_t incx, + T* y, int64_t incy, const std::vector& dependencies) { return column_major::axpy(func, queue, n, alpha, x, incx, y, incy, dependencies); } #define AXPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event axpy(sycl::queue &queue, int64_t n, TYPE alpha, const TYPE *x, int64_t incx, \ - TYPE *y, int64_t incy, const std::vector &dependencies) { \ + sycl::event axpy(sycl::queue& queue, int64_t n, TYPE alpha, const TYPE* x, int64_t incx, \ + TYPE* y, int64_t incy, const std::vector& dependencies) { \ return axpy(ROCBLAS_ROUTINE, queue, n, alpha, x, incx, y, incy, dependencies); \ } @@ -1550,38 +1548,38 @@ AXPY_LAUNCHER_USM(std::complex, rocblas_zaxpy) #undef AXPY_LAUNCHER_USM -sycl::event axpby(sycl::queue &queue, int64_t n, float alpha, const float *x, int64_t incx, - float beta, float *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, float alpha, const float* x, int64_t incx, + float beta, float* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, double alpha, const double *x, int64_t incx, - double beta, double *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, double alpha, const double* x, int64_t incx, + double beta, double* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } -sycl::event axpby(sycl::queue &queue, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(sycl::queue& queue, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { throw unimplemented("blas", "axpby", "for row_major layout"); } template -inline sycl::event rotg(Func func, sycl::queue &queue, T1 *a, T1 *b, T2 *c, T1 *s, - const std::vector &dependencies) { +inline sycl::event rotg(Func func, sycl::queue& queue, T1* a, T1* b, T2* c, T1* s, + const std::vector& dependencies) { return column_major::rotg(func, queue, a, b, c, s, dependencies); } #define ROTG_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event rotg(sycl::queue &queue, TYPE1 *a, TYPE1 *b, TYPE2 *c, TYPE1 *s, \ - const std::vector &dependencies) { \ + sycl::event rotg(sycl::queue& queue, TYPE1* a, TYPE1* b, TYPE2* c, TYPE1* s, \ + const std::vector& dependencies) { \ return rotg(ROCBLAS_ROUTINE, queue, a, b, c, s, dependencies); \ } @@ -1593,14 +1591,14 @@ ROTG_LAUNCHER_USM(std::complex, double, rocblas_zrotg) #undef ROTG_LAUNCHER_USM template -inline sycl::event rotm(Func func, sycl::queue &queue, int64_t n, T *x, int64_t incx, T *y, - int64_t incy, T *param, const std::vector &dependencies) { +inline sycl::event rotm(Func func, sycl::queue& queue, int64_t n, T* x, int64_t incx, T* y, + int64_t incy, T* param, const std::vector& dependencies) { return column_major::rotm(func, queue, n, x, incx, y, incy, param, dependencies); } #define ROTM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event rotm(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - TYPE *param, const std::vector &dependencies) { \ + sycl::event rotm(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + TYPE* param, const std::vector& dependencies) { \ return rotm(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, param, dependencies); \ } @@ -1610,14 +1608,14 @@ ROTM_LAUNCHER_USM(double, rocblas_drotm) #undef ROTM_LAUNCHER_USM template -inline sycl::event copy(Func func, sycl::queue &queue, int64_t n, const T *x, int64_t incx, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event copy(Func func, sycl::queue& queue, int64_t n, const T* x, int64_t incx, T* y, + int64_t incy, const std::vector& dependencies) { return column_major::copy(func, queue, n, x, incx, y, incy, dependencies); } #define COPY_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event copy(sycl::queue &queue, int64_t n, const TYPE *x, int64_t incx, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event copy(sycl::queue& queue, int64_t n, const TYPE* x, int64_t incx, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return copy(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1629,16 +1627,16 @@ COPY_LAUNCHER_USM(std::complex, rocblas_zcopy) #undef COPY_LAUNCHER_USM template -inline sycl::event dot(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - const T *y, int64_t incy, T *result, - const std::vector &dependencies) { +inline sycl::event dot(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + const T* y, int64_t incy, T* result, + const std::vector& dependencies) { return column_major::dot(func, queue, n, x, incx, y, incy, result, dependencies); } #define DOT_LAUNCHER_USM(EXT, TYPE, ROCBLAS_ROUTINE) \ - sycl::event dot##EXT(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - const TYPE *y, const int64_t incy, TYPE *result, \ - const std::vector &dependencies) { \ + sycl::event dot##EXT(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + const TYPE* y, const int64_t incy, TYPE* result, \ + const std::vector& dependencies) { \ return dot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, result, dependencies); \ } @@ -1651,21 +1649,21 @@ DOT_LAUNCHER_USM(c, std::complex, rocblas_zdotc) #undef DOT_LAUNCHER_USM -sycl::event dot(sycl::queue &queue, int64_t n, const float *x, int64_t incx, const float *y, - int64_t incy, double *result, const std::vector &dependencies) { +sycl::event dot(sycl::queue& queue, int64_t n, const float* x, int64_t incx, const float* y, + int64_t incy, double* result, const std::vector& dependencies) { throw unimplemented("blas", "dot", "for row_major layout"); } template -inline sycl::event rot(Func func, sycl::queue &queue, int64_t n, T1 *x, const int64_t incx, T1 *y, - int64_t incy, T2 c, T3 s, const std::vector &dependencies) { +inline sycl::event rot(Func func, sycl::queue& queue, int64_t n, T1* x, const int64_t incx, T1* y, + int64_t incy, T2 c, T3 s, const std::vector& dependencies) { return column_major::rot(func, queue, n, x, incx, y, incy, c, s, dependencies); } #define ROT_LAUNCHER_USM(TYPE1, TYPE2, TYPE3, ROCBLAS_ROUTINE) \ - sycl::event rot(sycl::queue &queue, int64_t n, TYPE1 *x, const int64_t incx, TYPE1 *y, \ + sycl::event rot(sycl::queue& queue, int64_t n, TYPE1* x, const int64_t incx, TYPE1* y, \ int64_t incy, TYPE2 c, TYPE3 s, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return rot(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, c, s, dependencies); \ } @@ -1676,21 +1674,21 @@ ROT_LAUNCHER_USM(std::complex, double, double, rocblas_zdrot) #undef ROT_LAUNCHER_USM -sycl::event sdsdot(sycl::queue &queue, int64_t n, float sb, const float *x, int64_t incx, - const float *y, int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event sdsdot(sycl::queue& queue, int64_t n, float sb, const float* x, int64_t incx, + const float* y, int64_t incy, float* result, + const std::vector& dependencies) { return column_major::sdsdot(queue, n, sb, x, incx, y, incy, result); } template -inline sycl::event rotmg(Func func, sycl::queue &queue, T *d1, T *d2, T *x1, T y1, T *param, - const std::vector &dependencies) { +inline sycl::event rotmg(Func func, sycl::queue& queue, T* d1, T* d2, T* x1, T y1, T* param, + const std::vector& dependencies) { return column_major::rotmg(func, queue, d1, d2, x1, y1, param, dependencies); } #define ROTMG_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event rotmg(sycl::queue &queue, TYPE *d1, TYPE *d2, TYPE *x1, TYPE y1, TYPE *param, \ - const std::vector &dependencies) { \ + sycl::event rotmg(sycl::queue& queue, TYPE* d1, TYPE* d2, TYPE* x1, TYPE y1, TYPE* param, \ + const std::vector& dependencies) { \ return rotmg(ROCBLAS_ROUTINE, queue, d1, d2, x1, y1, param, dependencies); \ } @@ -1700,14 +1698,14 @@ ROTMG_LAUNCHER_USM(double, rocblas_drotmg) #undef ROTMG_LAUNCHER_USM template -inline sycl::event iamax(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - int64_t *result, const std::vector &dependencies) { +inline sycl::event iamax(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + int64_t* result, const std::vector& dependencies) { return column_major::iamax(func, queue, n, x, incx, result, dependencies); } #define IAMAX_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event iamax(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamax(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamax(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1719,14 +1717,14 @@ IAMAX_LAUNCHER_USM(std::complex, rocblas_izamax) #undef IAMAX_LAUNCHER_USM template -inline sycl::event swap(Func func, sycl::queue &queue, int64_t n, T *x, int64_t incx, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event swap(Func func, sycl::queue& queue, int64_t n, T* x, int64_t incx, T* y, + int64_t incy, const std::vector& dependencies) { return column_major::swap(func, queue, n, x, incx, y, incy, dependencies); } #define SWAP_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event swap(sycl::queue &queue, int64_t n, TYPE *x, int64_t incx, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event swap(sycl::queue& queue, int64_t n, TYPE* x, int64_t incx, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return swap(ROCBLAS_ROUTINE, queue, n, x, incx, y, incy, dependencies); \ } @@ -1738,14 +1736,14 @@ SWAP_LAUNCHER_USM(std::complex, rocblas_zswap) #undef SWAP_LAUNCHER_USM template -inline sycl::event iamin(Func func, sycl::queue &queue, int64_t n, const T *x, const int64_t incx, - int64_t *result, const std::vector &dependencies) { +inline sycl::event iamin(Func func, sycl::queue& queue, int64_t n, const T* x, const int64_t incx, + int64_t* result, const std::vector& dependencies) { return column_major::iamin(func, queue, n, x, incx, result, dependencies); } #define IAMIN_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event iamin(sycl::queue &queue, int64_t n, const TYPE *x, const int64_t incx, \ - int64_t *result, const std::vector &dependencies) { \ + sycl::event iamin(sycl::queue& queue, int64_t n, const TYPE* x, const int64_t incx, \ + int64_t* result, const std::vector& dependencies) { \ return iamin(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } @@ -1757,14 +1755,14 @@ IAMIN_LAUNCHER_USM(std::complex, rocblas_izamin) #undef IAMIN_LAUNCHER_USM template -inline sycl::event nrm2(Func func, sycl::queue &queue, int64_t n, const T1 *x, const int64_t incx, - T2 *result, const std::vector &dependencies) { +inline sycl::event nrm2(Func func, sycl::queue& queue, int64_t n, const T1* x, const int64_t incx, + T2* result, const std::vector& dependencies) { return column_major::nrm2(func, queue, n, x, incx, result, dependencies); } #define NRM2_LAUNCHER_USM(TYPE1, TYPE2, ROCBLAS_ROUTINE) \ - sycl::event nrm2(sycl::queue &queue, int64_t n, const TYPE1 *x, const int64_t incx, \ - TYPE2 *result, const std::vector &dependencies) { \ + sycl::event nrm2(sycl::queue& queue, int64_t n, const TYPE1* x, const int64_t incx, \ + TYPE2* result, const std::vector& dependencies) { \ return nrm2(ROCBLAS_ROUTINE, queue, n, x, incx, result, dependencies); \ } diff --git a/src/blas/backends/rocblas/rocblas_level2.cpp b/src/blas/backends/rocblas/rocblas_level2.cpp index aaddd0d37..e0cc63183 100644 --- a/src/blas/backends/rocblas/rocblas_level2.cpp +++ b/src/blas/backends/rocblas/rocblas_level2.cpp @@ -28,7 +28,7 @@ // Helper Functions template -static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf, const int64_t len, +static inline void conj_vector(sycl::handler& cgh, sycl::buffer& buf, const int64_t len, const int64_t inc) { const auto abs_inc = std::abs(inc); auto acc = buf.template get_access(cgh); @@ -38,7 +38,7 @@ static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf, const i }); } template -static inline void conj_vector(sycl::handler &cgh, T *ptr, const int64_t len, const int64_t inc) { +static inline void conj_vector(sycl::handler& cgh, T* ptr, const int64_t len, const int64_t inc) { const auto abs_inc = std::abs(inc); cgh.parallel_for(sycl::range{ (std::size_t)len }, [=](sycl::id<1> id) { const auto index = id * abs_inc; @@ -47,7 +47,7 @@ static inline void conj_vector(sycl::handler &cgh, T *ptr, const int64_t len, co } template -static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf_a, sycl::buffer &buf_b, +static inline void conj_vector(sycl::handler& cgh, sycl::buffer& buf_a, sycl::buffer& buf_b, const int64_t len, const int64_t inc_a, const int64_t inc_b) { const auto abs_inc_a = std::abs(inc_a); const auto abs_inc_b = std::abs(inc_b); @@ -61,7 +61,7 @@ static inline void conj_vector(sycl::handler &cgh, sycl::buffer &buf_a, sycl: }); } template -static inline void conj_vector(sycl::handler &cgh, T *ptr_a, T *ptr_b, const int64_t len, +static inline void conj_vector(sycl::handler& cgh, T* ptr_a, T* ptr_b, const int64_t len, const int64_t inc_a, const int64_t inc_b) { const auto abs_inc_a = std::abs(inc_a); const auto abs_inc_b = std::abs(inc_b); @@ -82,34 +82,34 @@ namespace column_major { // Buffer APIs template -inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define GEMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ gemv(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -121,34 +121,34 @@ GEMV_LAUNCHER(std::complex, rocblas_zgemv) #undef GEMV_LAUNCHER template -inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, - int64_t ku, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, kl, ku, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, kl, ku, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define GBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ - int64_t incx, TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ + int64_t incx, TYPE beta, sycl::buffer& y, int64_t incy) { \ gbmv(ROCBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -160,32 +160,32 @@ GBMV_LAUNCHER(std::complex, rocblas_zgbmv) #undef GBMV_LAUNCHER template -inline void ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +inline void ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; - rocblas_native_func(func, err, handle, m, n, (rocDataType *)&alpha, x_, incx, y_, - incy, a_, lda); + rocblas_native_func(func, err, handle, m, n, (rocDataType*)&alpha, x_, incx, y_, incy, + a_, lda); }); }); } #define GER_LAUNCHER(EXT, TYPE, ROCBLAS_ROUTINE) \ - void ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, \ + void ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, \ int64_t lda) { \ ger(ROCBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda); \ } @@ -200,34 +200,34 @@ GER_LAUNCHER(c, std::complex, rocblas_zgerc) #undef GER_LAUNCHER template -inline void hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, k, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define HBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -237,34 +237,34 @@ HBMV_LAUNCHER(std::complex, rocblas_zhbmv) #undef HBMV_LAUNCHER template -inline void hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define HEMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hemv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -274,31 +274,31 @@ HEMV_LAUNCHER(std::complex, rocblas_zhemv) #undef HEMV_LAUNCHER template -inline void her(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, ScalarType alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, +inline void her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { using rocScalarType = typename RocEquivalentType::Type; using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocScalarType *)&alpha, x_, incx, a_, lda); + (rocScalarType*)&alpha, x_, incx, a_, lda); }); }); } #define HER_LAUNCHER(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - void her(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, \ + void her(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, \ int64_t lda) { \ her(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -309,33 +309,33 @@ HER_LAUNCHER(double, std::complex, rocblas_zher) #undef HER_LAUNCHER template -inline void her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_, lda); + (rocDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define HER2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ her2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); \ } @@ -345,34 +345,33 @@ HER2_LAUNCHER(std::complex, rocblas_zher2) #undef HER2_LAUNCHER template -inline void hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, x_, incx, (rocDataType *)&beta, y_, - incy); + (rocDataType*)&alpha, a_, x_, incx, (rocDataType*)&beta, y_, incy); }); }); } #define HPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ hpmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); \ } @@ -382,30 +381,30 @@ HPMV_LAUNCHER(std::complex, rocblas_zhpmv) #undef HPMV_LAUNCHER template -inline void hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, ScalarType alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a) { using rocScalarType = typename RocEquivalentType::Type; using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocScalarType *)&alpha, x_, incx, a_); + (rocScalarType*)&alpha, x_, incx, a_); }); }); } #define HPR_LAUNCHER(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ hpr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -415,33 +414,33 @@ HPR_LAUNCHER(double, std::complex, rocblas_zhpr) #undef HPR_LAUNCHER template -inline void hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_); + (rocDataType*)&alpha, x_, incx, y_, incy, a_); }); }); } #define HPR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ hpr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -451,34 +450,34 @@ HPR2_LAUNCHER(std::complex, rocblas_zhpr2) #undef HPR2_LAUNCHER template -inline void sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, k, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define SBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ sbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -488,34 +487,34 @@ SBMV_LAUNCHER(double, rocblas_dsbmv) #undef SBMV_LAUNCHER template -inline void symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); } #define SYMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ symv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -525,29 +524,29 @@ SYMV_LAUNCHER(double, rocblas_dsymv) #undef SYMV_LAUNCHER template -inline void syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { +inline void syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, a_, lda); + (rocDataType*)&alpha, x_, incx, a_, lda); }); }); } #define SYR_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { \ + void syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { \ syr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -560,33 +559,33 @@ SYR_LAUNCHER(std::complex, rocblas_zsyr) #undef SYR_LAUNCHER template -inline void syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_, lda); + (rocDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); } #define SYR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ syr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); \ } @@ -599,34 +598,33 @@ SYR2_LAUNCHER(std::complex, rocblas_zsyr2) #undef SYR2_LAUNCHER template -inline void spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, x_, incx, (rocDataType *)&beta, y_, - incy); + (rocDataType*)&alpha, a_, x_, incx, (rocDataType*)&beta, y_, incy); }); }); } #define SPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ spmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); \ } @@ -636,29 +634,29 @@ SPMV_LAUNCHER(double, rocblas_dspmv) #undef SPMV_LAUNCHER template -inline void spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, a_); + (rocDataType*)&alpha, x_, incx, a_); }); }); } #define SPR_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ spr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -668,33 +666,33 @@ SPR_LAUNCHER(double, rocblas_dspr) #undef SPR_LAUNCHER template -inline void spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); auto y_acc = y.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); - auto y_ = sc.get_mem(y_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); + auto y_ = sc.get_mem(y_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_); + (rocDataType*)&alpha, x_, incx, y_, incy, a_); }); }); } #define SPR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ spr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -704,31 +702,31 @@ SPR2_LAUNCHER(double, rocblas_dspr2) #undef SPR2_LAUNCHER template -inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, k, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + k, a_, lda, x_, incx); }); }); } #define TBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); \ } @@ -741,31 +739,31 @@ TBMV_LAUNCHER(std::complex, rocblas_ztbmv) #undef TBMV_LAUNCHER template -inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, k, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + k, a_, lda, x_, incx); }); }); } #define TBSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); \ } @@ -778,30 +776,30 @@ TBSV_LAUNCHER(std::complex, rocblas_ztbsv) #undef TBSV_LAUNCHER template -inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, sycl::buffer &x, int64_t incx) { +inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, x_, incx); }); }); } #define TPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx); \ } @@ -813,30 +811,30 @@ TPMV_LAUNCHER(std::complex, rocblas_ztpmv) #undef TPMV_LAUNCHER template -inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, sycl::buffer &x, int64_t incx) { +inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, x_, incx); }); }); } #define TPSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx); \ } @@ -848,31 +846,31 @@ TPSV_LAUNCHER(std::complex, rocblas_ztpsv) #undef TPSV_LAUNCHER template -inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, lda, x_, incx); }); }); } #define TRMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); \ } @@ -884,31 +882,31 @@ TRMV_LAUNCHER(std::complex, rocblas_ztrmv) #undef TRMV_LAUNCHER template -inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto x_acc = x.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto x_ = sc.get_mem(x_acc); + auto a_ = sc.get_mem(a_acc); + auto x_ = sc.get_mem(x_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, lda, x_, incx); }); }); } #define TRSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); \ } @@ -922,24 +920,24 @@ TRSV_LAUNCHER(std::complex, rocblas_ztrsv) // USM APIs template -inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -947,9 +945,9 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t } #define GEMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return gemv(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -962,25 +960,25 @@ GEMV_LAUNCHER_USM(std::complex, rocblas_zgemv) #undef GEMV_LAUNCHER_USM template -inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + int64_t kl, int64_t ku, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, kl, ku, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(trans), m, n, kl, ku, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -988,10 +986,10 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t } #define GBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ - int64_t ku, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *x, \ - int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ + int64_t ku, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* x, \ + int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return gbmv(ROCBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -1004,23 +1002,23 @@ GBMV_LAUNCHER_USM(std::complex, rocblas_zgbmv) #undef GBMV_LAUNCHER_USM template -inline sycl::event ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, const T *x, - int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, const T* x, + int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, m, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; - rocblas_native_func(func, err, handle, m, n, (rocDataType *)&alpha, x_, incx, y_, - incy, a_, lda); + rocblas_native_func(func, err, handle, m, n, (rocDataType*)&alpha, x_, incx, y_, incy, + a_, lda); }); }); @@ -1028,9 +1026,9 @@ inline sycl::event ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T al } #define GER_LAUNCHER_USM(EXT, TYPE, ROCBLAS_ROUTINE) \ - sycl::event ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return ger(ROCBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); \ } @@ -1044,24 +1042,24 @@ GER_LAUNCHER_USM(c, std::complex, rocblas_zgerc) #undef GER_LAUNCHER_USM template -inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, k, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -1069,9 +1067,9 @@ inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return hbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -1082,24 +1080,24 @@ HBMV_LAUNCHER_USM(std::complex, rocblas_zhbmv) #undef HBMV_LAUNCHER_USM template -inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -1107,9 +1105,9 @@ inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HEMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hemv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -1120,23 +1118,23 @@ HEMV_LAUNCHER_USM(std::complex, rocblas_zhemv) #undef HEMV_LAUNCHER_USM template -inline sycl::event her(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - const ScalarType alpha, const DataType *x, int64_t incx, DataType *a, - int64_t lda, const std::vector &dependencies) { +inline sycl::event her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + const ScalarType alpha, const DataType* x, int64_t incx, DataType* a, + int64_t lda, const std::vector& dependencies) { using rocScalarType = typename RocEquivalentType::Type; using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocScalarType *)&alpha, x_, incx, a_, lda); + (rocScalarType*)&alpha, x_, incx, a_, lda); }); }); @@ -1144,9 +1142,9 @@ inline sycl::event her(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HER_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); \ } @@ -1156,23 +1154,23 @@ HER_LAUNCHER_USM(double, std::complex, rocblas_zher) #undef HER_LAUNCHER_USM template -inline sycl::event her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_, lda); + (rocDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -1180,9 +1178,9 @@ inline sycl::event her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HER2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -1193,24 +1191,23 @@ HER2_LAUNCHER_USM(std::complex, rocblas_zher2) #undef HER2_LAUNCHER_USM template -inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, x_, incx, (rocDataType *)&beta, y_, - incy); + (rocDataType*)&alpha, a_, x_, incx, (rocDataType*)&beta, y_, incy); }); }); @@ -1218,9 +1215,9 @@ inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hpmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, \ dependencies); \ } @@ -1231,23 +1228,23 @@ HPMV_LAUNCHER_USM(std::complex, rocblas_zhpmv) #undef HPMV_LAUNCHER_USM template -inline sycl::event hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - const ScalarType alpha, const DataType *x, int64_t incx, DataType *a, - const std::vector &dependencies) { +inline sycl::event hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + const ScalarType alpha, const DataType* x, int64_t incx, DataType* a, + const std::vector& dependencies) { using rocScalarType = typename RocEquivalentType::Type; using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocScalarType *)&alpha, x_, incx, a_); + (rocScalarType*)&alpha, x_, incx, a_); }); }); @@ -1255,9 +1252,9 @@ inline sycl::event hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPR_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, \ + const std::vector& dependencies) { \ return hpr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, dependencies); \ } @@ -1267,23 +1264,23 @@ HPR_LAUNCHER_USM(double, std::complex, rocblas_zhpr) #undef HPR_LAUNCHER_USM template -inline sycl::event hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, - const std::vector &dependencies) { +inline sycl::event hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_); + (rocDataType*)&alpha, x_, incx, y_, incy, a_); }); }); @@ -1291,9 +1288,9 @@ inline sycl::event hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return hpr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ dependencies); \ } @@ -1304,24 +1301,24 @@ HPR2_LAUNCHER_USM(std::complex, rocblas_zhpr2) #undef HPR2_LAUNCHER_USM template -inline sycl::event sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, k, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -1329,9 +1326,9 @@ inline sycl::event sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return sbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -1342,24 +1339,24 @@ SBMV_LAUNCHER_USM(double, rocblas_dsbmv) #undef SBMV_LAUNCHER_USM template -inline sycl::event symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, lda, x_, incx, (rocDataType *)&beta, - y_, incy); + (rocDataType*)&alpha, a_, lda, x_, incx, (rocDataType*)&beta, y_, + incy); }); }); @@ -1367,9 +1364,9 @@ inline sycl::event symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return symv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -1380,22 +1377,22 @@ SYMV_LAUNCHER_USM(double, rocblas_dsymv) #undef SYMV_LAUNCHER_USM template -inline sycl::event syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, T* a, int64_t lda, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, a_, lda); + (rocDataType*)&alpha, x_, incx, a_, lda); }); }); @@ -1403,9 +1400,9 @@ inline sycl::event syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYR_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); \ } @@ -1418,23 +1415,23 @@ SYR_LAUNCHER_USM(std::complex, rocblas_zsyr) #undef SYR_LAUNCHER_USM template -inline sycl::event syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_, lda); + (rocDataType*)&alpha, x_, incx, y_, incy, a_, lda); }); }); @@ -1442,9 +1439,9 @@ inline sycl::event syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -1458,24 +1455,23 @@ SYR2_LAUNCHER_USM(std::complex, rocblas_zsyr2) #undef SYR2_LAUNCHER_USM template -inline sycl::event spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, a_, x_, incx, (rocDataType *)&beta, y_, - incy); + (rocDataType*)&alpha, a_, x_, incx, (rocDataType*)&beta, y_, incy); }); }); @@ -1483,9 +1479,9 @@ inline sycl::event spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return spmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, \ dependencies); \ } @@ -1496,21 +1492,21 @@ SPMV_LAUNCHER_USM(double, rocblas_dspmv) #undef SPMV_LAUNCHER_USM template -inline sycl::event spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, T *a, - const std::vector &dependencies) { +inline sycl::event spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, T* a, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, a_); + (rocDataType*)&alpha, x_, incx, a_); }); }); @@ -1518,8 +1514,8 @@ inline sycl::event spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPR_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, const std::vector &dependencies) { \ + sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, const std::vector& dependencies) { \ return spr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, dependencies); \ } @@ -1529,23 +1525,23 @@ SPR_LAUNCHER_USM(double, rocblas_dspr) #undef SPR_LAUNCHER_USM template -inline sycl::event spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, - const std::vector &dependencies) { +inline sycl::event spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx, incy); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); - auto y_ = reinterpret_cast(y); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); + auto y_ = reinterpret_cast(y); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), n, - (rocDataType *)&alpha, x_, incx, y_, incy, a_); + (rocDataType*)&alpha, x_, incx, y_, incy, a_); }); }); @@ -1553,9 +1549,9 @@ inline sycl::event spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return spr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ dependencies); \ } @@ -1566,23 +1562,23 @@ SPR2_LAUNCHER_USM(double, rocblas_dspr2) #undef SPR2_LAUNCHER_USM template -inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, k, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + k, a_, lda, x_, incx); }); }); @@ -1590,9 +1586,9 @@ inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, \ dependencies); \ } @@ -1605,23 +1601,23 @@ TBMV_LAUNCHER_USM(std::complex, rocblas_ztbmv) #undef TBMV_LAUNCHER_USM template -inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, k, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + k, a_, lda, x_, incx); }); }); @@ -1629,9 +1625,9 @@ inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TBSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, \ dependencies); \ } @@ -1644,23 +1640,23 @@ TBSV_LAUNCHER_USM(std::complex, rocblas_ztbsv) #undef TBSV_LAUNCHER_USM template -inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, x_, incx); }); }); @@ -1668,9 +1664,9 @@ inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx, \ dependencies); \ } @@ -1683,23 +1679,23 @@ TPMV_LAUNCHER_USM(std::complex, rocblas_ztpmv) #undef TPMV_LAUNCHER_USM template -inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, x_, incx); }); }); @@ -1707,9 +1703,9 @@ inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TPSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx, \ dependencies); \ } @@ -1722,23 +1718,23 @@ TPSV_LAUNCHER_USM(std::complex, rocblas_ztpsv) #undef TPSV_LAUNCHER_USM template -inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, lda, x_, incx); }); }); @@ -1746,9 +1742,9 @@ inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TRMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, \ dependencies); \ } @@ -1761,23 +1757,23 @@ TRMV_LAUNCHER_USM(std::complex, rocblas_ztrmv) #undef TRMV_LAUNCHER_USM template -inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, lda, incx); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto x_ = reinterpret_cast(x); + auto a_ = reinterpret_cast(a); + auto x_ = reinterpret_cast(x); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - n, a_, lda, x_, incx); + get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), n, + a_, lda, x_, incx); }); }); @@ -1785,9 +1781,9 @@ inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TRSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, \ dependencies); \ } @@ -1806,10 +1802,10 @@ namespace row_major { // Buffer APIs template -inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx, std::complex beta, - sycl::buffer, 1> &y, int64_t incy) { +inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx, std::complex beta, + sycl::buffer, 1>& y, int64_t incy) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -1818,10 +1814,10 @@ inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int6 beta = std::conj(beta); if (m > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, m, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, m, incx); }); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } @@ -1830,15 +1826,15 @@ inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int6 if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } template -inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -1846,9 +1842,9 @@ inline void gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int6 } #define GEMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ gemv(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -1860,10 +1856,10 @@ GEMV_LAUNCHER(std::complex, rocblas_zgemv) #undef GEMV_LAUNCHER template -inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, - int64_t ku, std::complex alpha, sycl::buffer, 1> &a, - int64_t lda, sycl::buffer, 1> &x, int64_t incx, - std::complex beta, sycl::buffer, 1> &y, int64_t incy) { +inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, std::complex alpha, sycl::buffer, 1>& a, + int64_t lda, sycl::buffer, 1>& x, int64_t incx, + std::complex beta, sycl::buffer, 1>& y, int64_t incy) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -1872,10 +1868,10 @@ inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int6 beta = std::conj(beta); if (m > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, m, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, m, incx); }); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } @@ -1884,15 +1880,15 @@ inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int6 if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } template -inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, - int64_t ku, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, - int64_t incx, T beta, sycl::buffer &y, int64_t incy) { +inline void gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, + int64_t ku, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, + int64_t incx, T beta, sycl::buffer& y, int64_t incy) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -1900,9 +1896,9 @@ inline void gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int6 } #define GBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ - int64_t incx, TYPE beta, sycl::buffer &y, int64_t incy) { \ + void gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, int64_t ku, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ + int64_t incx, TYPE beta, sycl::buffer& y, int64_t incy) { \ gbmv(ROCBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -1914,35 +1910,35 @@ GBMV_LAUNCHER(std::complex, rocblas_zgbmv) #undef GBMV_LAUNCHER template -inline void gerc(Func func, sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { +inline void gerc(Func func, sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda); } template -inline void geru(Func func, sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - sycl::buffer, 1> &x, int64_t incx, - sycl::buffer, 1> &y, int64_t incy, - sycl::buffer, 1> &a, int64_t lda) { +inline void geru(Func func, sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + sycl::buffer, 1>& x, int64_t incx, + sycl::buffer, 1>& y, int64_t incy, + sycl::buffer, 1>& a, int64_t lda) { column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda); } template -inline void ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, sycl::buffer &x, - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, +inline void ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, sycl::buffer& x, + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, int64_t lda) { column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda); } #define GER_LAUNCHER(EXT, TYPE, ROCBLAS_ROUTINE) \ - void ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer &x, \ - int64_t incx, sycl::buffer &y, int64_t incy, sycl::buffer &a, \ + void ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, sycl::buffer& x, \ + int64_t incx, sycl::buffer& y, int64_t incy, sycl::buffer& a, \ int64_t lda) { \ ger##EXT(ROCBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda); \ } @@ -1957,29 +1953,29 @@ GER_LAUNCHER(c, std::complex, rocblas_zgeru) #undef GER_LAUNCHER template -inline void hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::hbmv(func, queue, new_uplo, n, k, new_alpha, a, lda, x, incx, new_beta, y, incy); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } #define HBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -1989,29 +1985,29 @@ HBMV_LAUNCHER(std::complex, rocblas_zhbmv) #undef HBMV_LAUNCHER template -inline void hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::hemv(func, queue, new_uplo, n, new_alpha, a, lda, x, incx, new_beta, y, incy); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } #define HEMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ hemv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -2021,22 +2017,22 @@ HEMV_LAUNCHER(std::complex, rocblas_zhemv) #undef HEMV_LAUNCHER template -inline void her(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, ScalarType alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, +inline void her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } column_major::her(func, queue, new_uplo, n, alpha, x, incx, a, lda); } #define HER_LAUNCHER(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - void her(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, \ + void her(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, \ int64_t lda) { \ her(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -2047,23 +2043,23 @@ HER_LAUNCHER(double, std::complex, rocblas_zher) #undef HER_LAUNCHER template -inline void her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::her2(func, queue, new_uplo, n, alpha, y, incy, x, incx, a, lda); } #define HER2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ her2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); \ } @@ -2073,29 +2069,29 @@ HER2_LAUNCHER(std::complex, rocblas_zher2) #undef HER2_LAUNCHER template -inline void hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_alpha = std::conj(alpha); auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::hpmv(func, queue, new_uplo, n, new_alpha, a, x, incx, new_beta, y, incy); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } #define HPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ hpmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); \ } @@ -2105,21 +2101,21 @@ HPMV_LAUNCHER(std::complex, rocblas_zhpmv) #undef HPMV_LAUNCHER template -inline void hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, ScalarType alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, ScalarType alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } column_major::hpr(func, queue, new_uplo, n, alpha, x, incx, a); } #define HPR_LAUNCHER(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - void hpr(sycl::queue &queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void hpr(sycl::queue& queue, uplo upper_lower, int64_t n, SCALAR_TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ hpr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -2129,23 +2125,23 @@ HPR_LAUNCHER(double, std::complex, rocblas_zhpr) #undef HPR_LAUNCHER template -inline void hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, y, n, incx, incy); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, y, n, incx, incy); }); } column_major::hpr2(func, queue, new_uplo, n, alpha, y, incy, x, incx, a); } #define HPR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ hpr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -2155,9 +2151,9 @@ HPR2_LAUNCHER(std::complex, rocblas_zhpr2) #undef HPR2_LAUNCHER template -inline void sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2165,9 +2161,9 @@ inline void sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int } #define SBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ sbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -2177,9 +2173,9 @@ SBMV_LAUNCHER(double, rocblas_dsbmv) #undef SBMV_LAUNCHER template -inline void symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2187,9 +2183,9 @@ inline void symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T a } #define SYMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx, \ - TYPE beta, sycl::buffer &y, int64_t incy) { \ + void symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx, \ + TYPE beta, sycl::buffer& y, int64_t incy) { \ symv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); \ } @@ -2199,8 +2195,8 @@ SYMV_LAUNCHER(double, rocblas_dsymv) #undef SYMV_LAUNCHER template -inline void syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { +inline void syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2208,8 +2204,8 @@ inline void syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T al } #define SYR_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a, int64_t lda) { \ + void syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a, int64_t lda) { \ syr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda); \ } @@ -2222,9 +2218,9 @@ SYR_LAUNCHER(std::complex, rocblas_zsyr) #undef SYR_LAUNCHER template -inline void syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a, int64_t lda) { +inline void syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a, int64_t lda) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2232,9 +2228,9 @@ inline void syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T a } #define SYR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a, int64_t lda) { \ + void syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a, int64_t lda) { \ syr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); \ } @@ -2247,9 +2243,9 @@ SYR2_LAUNCHER(std::complex, rocblas_zsyr2) #undef SYR2_LAUNCHER template -inline void spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &a, sycl::buffer &x, int64_t incx, T beta, - sycl::buffer &y, int64_t incy) { +inline void spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& a, sycl::buffer& x, int64_t incx, T beta, + sycl::buffer& y, int64_t incy) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2257,9 +2253,9 @@ inline void spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T a } #define SPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx, TYPE beta, \ - sycl::buffer &y, int64_t incy) { \ + void spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx, TYPE beta, \ + sycl::buffer& y, int64_t incy) { \ spmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); \ } @@ -2269,8 +2265,8 @@ SPMV_LAUNCHER(double, rocblas_dspmv) #undef SPMV_LAUNCHER template -inline void spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &a) { +inline void spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& a) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2278,8 +2274,8 @@ inline void spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T al } #define SPR_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &a) { \ + void spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& a) { \ spr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a); \ } @@ -2289,9 +2285,9 @@ SPR_LAUNCHER(double, rocblas_dspr) #undef SPR_LAUNCHER template -inline void spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, - sycl::buffer &a) { +inline void spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, + sycl::buffer& a) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2299,9 +2295,9 @@ inline void spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T a } #define SPR2_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, \ - sycl::buffer &x, int64_t incx, sycl::buffer &y, int64_t incy, \ - sycl::buffer &a) { \ + void spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, \ + sycl::buffer& x, int64_t incx, sycl::buffer& y, int64_t incy, \ + sycl::buffer& a) { \ spr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a); \ } @@ -2311,9 +2307,9 @@ SPR2_LAUNCHER(double, rocblas_dspr2) #undef SPR2_LAUNCHER template -inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { +inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2321,7 +2317,7 @@ inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2329,14 +2325,14 @@ inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2347,8 +2343,8 @@ inline void tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran } #define TBMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); \ } @@ -2361,9 +2357,9 @@ TBMV_LAUNCHER(std::complex, rocblas_ztbmv) #undef TBMV_LAUNCHER template -inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { +inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2371,7 +2367,7 @@ inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2379,14 +2375,14 @@ inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2397,8 +2393,8 @@ inline void tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran } #define TBSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - int64_t k, sycl::buffer &a, int64_t lda, sycl::buffer &x, \ + void tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + int64_t k, sycl::buffer& a, int64_t lda, sycl::buffer& x, \ int64_t incx) { \ tbsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); \ } @@ -2411,9 +2407,9 @@ TBSV_LAUNCHER(std::complex, rocblas_ztbsv) #undef TBSV_LAUNCHER template -inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { +inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2421,7 +2417,7 @@ inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2429,14 +2425,14 @@ inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, sycl::buffer &x, int64_t incx) { +inline void tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2446,8 +2442,8 @@ inline void tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran } #define TPMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx); \ } @@ -2459,9 +2455,9 @@ TPMV_LAUNCHER(std::complex, rocblas_ztpmv) #undef TPMV_LAUNCHER template -inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, int64_t incx) { +inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2469,7 +2465,7 @@ inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2477,14 +2473,14 @@ inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, sycl::buffer &x, int64_t incx) { +inline void tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2494,8 +2490,8 @@ inline void tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran } #define TPSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, sycl::buffer &x, int64_t incx) { \ + void tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, sycl::buffer& x, int64_t incx) { \ tpsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx); \ } @@ -2507,9 +2503,9 @@ TPSV_LAUNCHER(std::complex, rocblas_ztpsv) #undef TPSV_LAUNCHER template -inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { +inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2517,7 +2513,7 @@ inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2525,14 +2521,14 @@ inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2543,8 +2539,8 @@ inline void trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran } #define TRMV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); \ } @@ -2556,9 +2552,9 @@ TRMV_LAUNCHER(std::complex, rocblas_ztrmv) #undef TRMV_LAUNCHER template -inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer, 1> &a, int64_t lda, - sycl::buffer, 1> &x, int64_t incx) { +inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer, 1>& a, int64_t lda, + sycl::buffer, 1>& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2566,7 +2562,7 @@ inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } @@ -2574,14 +2570,14 @@ inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }); + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }); } } } template -inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, - int64_t n, sycl::buffer &a, int64_t lda, sycl::buffer &x, +inline void trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, + int64_t n, sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -2592,8 +2588,8 @@ inline void trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose tran } #define TRSV_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ - sycl::buffer &a, int64_t lda, sycl::buffer &x, int64_t incx) { \ + void trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, int64_t n, \ + sycl::buffer& a, int64_t lda, sycl::buffer& x, int64_t incx) { \ trsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); \ } @@ -2607,11 +2603,11 @@ TRSV_LAUNCHER(std::complex, rocblas_ztrsv) // USM APIs template -inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { sycl::event done; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2623,10 +2619,10 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t if (m > 0) { done = queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, (std::complex *)x, m, incx); }); + [&](sycl::handler& cgh) { conj_vector(cgh, (std::complex*)x, m, incx); }); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + done = queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } @@ -2638,7 +2634,7 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2649,9 +2645,9 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t } template -inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event gemv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -2660,9 +2656,9 @@ inline sycl::event gemv(Func func, sycl::queue &queue, transpose trans, int64_t } #define GEMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event gemv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return gemv(ROCBLAS_ROUTINE, queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -2675,11 +2671,11 @@ GEMV_LAUNCHER_USM(std::complex, rocblas_zgemv) #undef GEMV_LAUNCHER_USM template -inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, std::complex alpha, const std::complex *a, - int64_t lda, const std::complex *x, int64_t incx, std::complex beta, - std::complex *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + int64_t kl, int64_t ku, std::complex alpha, const std::complex* a, + int64_t lda, const std::complex* x, int64_t incx, std::complex beta, + std::complex* y, int64_t incy, + const std::vector& dependencies) { sycl::event done; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -2691,10 +2687,10 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t if (m > 0) { done = queue.submit( - [&](sycl::handler &cgh) { conj_vector(cgh, (std::complex *)x, m, incx); }); + [&](sycl::handler& cgh) { conj_vector(cgh, (std::complex*)x, m, incx); }); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, y, n, incy); }); + done = queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, y, n, incy); }); } } } @@ -2706,7 +2702,7 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2717,10 +2713,10 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t } template -inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t m, int64_t n, - int64_t kl, int64_t ku, T alpha, const T *a, int64_t lda, const T *x, - int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event gbmv(Func func, sycl::queue& queue, transpose trans, int64_t m, int64_t n, + int64_t kl, int64_t ku, T alpha, const T* a, int64_t lda, const T* x, + int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans : oneapi::mkl::transpose::nontrans; @@ -2729,10 +2725,10 @@ inline sycl::event gbmv(Func func, sycl::queue &queue, transpose trans, int64_t } #define GBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gbmv(sycl::queue &queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ - int64_t ku, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *x, \ - int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event gbmv(sycl::queue& queue, transpose trans, int64_t m, int64_t n, int64_t kl, \ + int64_t ku, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* x, \ + int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return gbmv(ROCBLAS_ROUTINE, queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -2745,12 +2741,12 @@ GBMV_LAUNCHER_USM(std::complex, rocblas_zgbmv) #undef GBMV_LAUNCHER_USM template -inline sycl::event gerc(Func func, sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event gerc(Func func, sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (std::complex *)y, n, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (std::complex*)y, n, incy); }) .wait_and_throw(); } @@ -2758,24 +2754,24 @@ inline sycl::event gerc(Func func, sycl::queue &queue, int64_t m, int64_t n, std } template -inline sycl::event geru(Func func, sycl::queue &queue, int64_t m, int64_t n, std::complex alpha, - const std::complex *x, int64_t incx, const std::complex *y, - int64_t incy, std::complex *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event geru(Func func, sycl::queue& queue, int64_t m, int64_t n, std::complex alpha, + const std::complex* x, int64_t incx, const std::complex* y, + int64_t incy, std::complex* a, int64_t lda, + const std::vector& dependencies) { return column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda, dependencies); } template -inline sycl::event ger(Func func, sycl::queue &queue, int64_t m, int64_t n, T alpha, const T *x, - int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event ger(Func func, sycl::queue& queue, int64_t m, int64_t n, T alpha, const T* x, + int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { return column_major::ger(func, queue, n, m, alpha, y, incy, x, incx, a, lda, dependencies); } #define GER_LAUNCHER_USM(EXT, TYPE, ROCBLAS_ROUTINE) \ - sycl::event ger##EXT(sycl::queue &queue, int64_t m, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event ger##EXT(sycl::queue& queue, int64_t m, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return ger##EXT(ROCBLAS_ROUTINE, queue, m, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -2790,9 +2786,9 @@ GER_LAUNCHER_USM(c, std::complex, rocblas_zgeru) #undef GER_LAUNCHER_USM template -inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -2801,7 +2797,7 @@ inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, y, n, incx, incy); }) .wait_and_throw(); } @@ -2809,7 +2805,7 @@ inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t incy, dependencies); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2819,9 +2815,9 @@ inline sycl::event hbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event hbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return hbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -2832,9 +2828,9 @@ HBMV_LAUNCHER_USM(std::complex, rocblas_zhbmv) #undef HBMV_LAUNCHER_USM template -inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event hemv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -2843,7 +2839,7 @@ inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, y, n, incx, incy); }) .wait_and_throw(); } @@ -2851,7 +2847,7 @@ inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t incy, dependencies); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2861,9 +2857,9 @@ inline sycl::event hemv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HEMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hemv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hemv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hemv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -2874,14 +2870,14 @@ HEMV_LAUNCHER_USM(std::complex, rocblas_zhemv) #undef HEMV_LAUNCHER_USM template -inline sycl::event her(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - const ScalarType alpha, const DataType *x, int64_t incx, DataType *a, - int64_t lda, const std::vector &dependencies) { +inline sycl::event her(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + const ScalarType alpha, const DataType* x, int64_t incx, DataType* a, + int64_t lda, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (DataType *)x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (DataType*)x, n, incx); }) .wait_and_throw(); } @@ -2889,9 +2885,9 @@ inline sycl::event her(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HER_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - sycl::event her(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); \ } @@ -2901,14 +2897,14 @@ HER_LAUNCHER_USM(double, std::complex, rocblas_zher) #undef HER_LAUNCHER_USM template -inline sycl::event her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event her2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, (T *)y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, (T*)y, n, incx, incy); }) .wait_and_throw(); } @@ -2917,9 +2913,9 @@ inline sycl::event her2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HER2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event her2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event her2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return her2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -2930,9 +2926,9 @@ HER2_LAUNCHER_USM(std::complex, rocblas_zher2) #undef HER2_LAUNCHER_USM template -inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event hpmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -2941,7 +2937,7 @@ inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t auto new_beta = std::conj(beta); if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, y, n, incx, incy); }) .wait_and_throw(); } @@ -2949,7 +2945,7 @@ inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t dependencies); if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, y, n, incy); }); @@ -2959,9 +2955,9 @@ inline sycl::event hpmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event hpmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return hpmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, \ dependencies); \ } @@ -2972,14 +2968,14 @@ HPMV_LAUNCHER_USM(std::complex, rocblas_zhpmv) #undef HPMV_LAUNCHER_USM template -inline sycl::event hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, - const ScalarType alpha, const DataType *x, int64_t incx, DataType *a, - const std::vector &dependencies) { +inline sycl::event hpr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, + const ScalarType alpha, const DataType* x, int64_t incx, DataType* a, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (DataType *)x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (DataType*)x, n, incx); }) .wait_and_throw(); } @@ -2987,9 +2983,9 @@ inline sycl::event hpr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPR_LAUNCHER_USM(SCALAR_TYPE, DATA_TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpr(sycl::queue &queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ - const DATA_TYPE *x, int64_t incx, DATA_TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr(sycl::queue& queue, uplo upper_lower, int64_t n, const SCALAR_TYPE alpha, \ + const DATA_TYPE* x, int64_t incx, DATA_TYPE* a, \ + const std::vector& dependencies) { \ return hpr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, dependencies); \ } @@ -2999,14 +2995,14 @@ HPR_LAUNCHER_USM(double, std::complex, rocblas_zhpr) #undef HPR_LAUNCHER_USM template -inline sycl::event hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, - const std::vector &dependencies) { +inline sycl::event hpr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, (T *)x, (T *)y, n, incx, incy); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, (T*)x, (T*)y, n, incx, incy); }) .wait_and_throw(); } @@ -3014,9 +3010,9 @@ inline sycl::event hpr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define HPR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hpr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event hpr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return hpr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ dependencies); \ } @@ -3027,9 +3023,9 @@ HPR2_LAUNCHER_USM(std::complex, rocblas_zhpr2) #undef HPR2_LAUNCHER_USM template -inline sycl::event sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, - T alpha, const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event sbmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, + T alpha, const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -3038,9 +3034,9 @@ inline sycl::event sbmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event sbmv(sycl::queue &queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ - const TYPE *a, int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, \ - int64_t incy, const std::vector &dependencies) { \ + sycl::event sbmv(sycl::queue& queue, uplo upper_lower, int64_t n, int64_t k, TYPE alpha, \ + const TYPE* a, int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, \ + int64_t incy, const std::vector& dependencies) { \ return sbmv(ROCBLAS_ROUTINE, queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, \ incy, dependencies); \ } @@ -3051,9 +3047,9 @@ SBMV_LAUNCHER_USM(double, rocblas_dsbmv) #undef SBMV_LAUNCHER_USM template -inline sycl::event symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, int64_t lda, const T *x, int64_t incx, T beta, T *y, - int64_t incy, const std::vector &dependencies) { +inline sycl::event symv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, int64_t lda, const T* x, int64_t incx, T beta, T* y, + int64_t incy, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -3062,9 +3058,9 @@ inline sycl::event symv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event symv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - int64_t lda, const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event symv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + int64_t lda, const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return symv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, \ dependencies); \ } @@ -3075,9 +3071,9 @@ SYMV_LAUNCHER_USM(double, rocblas_dsymv) #undef SYMV_LAUNCHER_USM template -inline sycl::event syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, T* a, int64_t lda, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -3085,9 +3081,9 @@ inline sycl::event syr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYR_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); \ } @@ -3100,9 +3096,9 @@ SYR_LAUNCHER_USM(std::complex, rocblas_zsyr) #undef SYR_LAUNCHER_USM template -inline sycl::event syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, int64_t lda, - const std::vector &dependencies) { +inline sycl::event syr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, int64_t lda, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -3111,9 +3107,9 @@ inline sycl::event syr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SYR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, int64_t lda, \ - const std::vector &dependencies) { \ + sycl::event syr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, int64_t lda, \ + const std::vector& dependencies) { \ return syr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, \ dependencies); \ } @@ -3127,9 +3123,9 @@ SYR2_LAUNCHER_USM(std::complex, rocblas_zsyr2) #undef SYR2_LAUNCHER_USM template -inline sycl::event spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *a, const T *x, int64_t incx, T beta, T *y, int64_t incy, - const std::vector &dependencies) { +inline sycl::event spmv(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* a, const T* x, int64_t incx, T beta, T* y, int64_t incy, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -3138,9 +3134,9 @@ inline sycl::event spmv(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spmv(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *a, \ - const TYPE *x, int64_t incx, TYPE beta, TYPE *y, int64_t incy, \ - const std::vector &dependencies) { \ + sycl::event spmv(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* a, \ + const TYPE* x, int64_t incx, TYPE beta, TYPE* y, int64_t incy, \ + const std::vector& dependencies) { \ return spmv(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, \ dependencies); \ } @@ -3151,9 +3147,9 @@ SPMV_LAUNCHER_USM(double, rocblas_dspmv) #undef SPMV_LAUNCHER_USM template -inline sycl::event spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, T *a, - const std::vector &dependencies) { +inline sycl::event spr(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, T* a, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -3161,8 +3157,8 @@ inline sycl::event spr(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPR_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spr(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, TYPE *a, const std::vector &dependencies) { \ + sycl::event spr(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, TYPE* a, const std::vector& dependencies) { \ return spr(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, a, dependencies); \ } @@ -3172,9 +3168,9 @@ SPR_LAUNCHER_USM(double, rocblas_dspr) #undef SPR_LAUNCHER_USM template -inline sycl::event spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t n, T alpha, - const T *x, int64_t incx, const T *y, int64_t incy, T *a, - const std::vector &dependencies) { +inline sycl::event spr2(Func func, sycl::queue& queue, uplo upper_lower, int64_t n, T alpha, + const T* x, int64_t incx, const T* y, int64_t incy, T* a, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; @@ -3182,9 +3178,9 @@ inline sycl::event spr2(Func func, sycl::queue &queue, uplo upper_lower, int64_t } #define SPR2_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event spr2(sycl::queue &queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE *x, \ - int64_t incx, const TYPE *y, int64_t incy, TYPE *a, \ - const std::vector &dependencies) { \ + sycl::event spr2(sycl::queue& queue, uplo upper_lower, int64_t n, TYPE alpha, const TYPE* x, \ + int64_t incx, const TYPE* y, int64_t incy, TYPE* a, \ + const std::vector& dependencies) { \ return spr2(ROCBLAS_ROUTINE, queue, upper_lower, n, alpha, x, incx, y, incy, a, \ dependencies); \ } @@ -3195,10 +3191,10 @@ SPR2_LAUNCHER_USM(double, rocblas_dspr2) #undef SPR2_LAUNCHER_USM template -inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -3208,7 +3204,7 @@ inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3218,7 +3214,7 @@ inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3229,9 +3225,9 @@ inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tbmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -3242,9 +3238,9 @@ inline sycl::event tbmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TBMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tbmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, \ dependencies); \ } @@ -3257,10 +3253,10 @@ TBMV_LAUNCHER_USM(std::complex, rocblas_ztbmv) #undef TBMV_LAUNCHER_USM template -inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -3270,7 +3266,7 @@ inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3280,7 +3276,7 @@ inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3291,9 +3287,9 @@ inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, int64_t k, const T *a, int64_t lda, T *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tbsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, int64_t k, const T* a, int64_t lda, T* x, + int64_t incx, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -3304,9 +3300,9 @@ inline sycl::event tbsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TBSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tbsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, int64_t k, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tbsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, int64_t k, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tbsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, \ dependencies); \ } @@ -3319,9 +3315,9 @@ TBSV_LAUNCHER_USM(std::complex, rocblas_ztbsv) #undef TBSV_LAUNCHER_USM template -inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -3331,7 +3327,7 @@ inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3343,7 +3339,7 @@ inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (n > 0) { incx = std::abs(incx); - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3354,9 +3350,9 @@ inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -3367,9 +3363,9 @@ inline sycl::event tpmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TPMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tpmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx, \ dependencies); \ } @@ -3382,9 +3378,9 @@ TPMV_LAUNCHER_USM(std::complex, rocblas_ztpmv) #undef TPMV_LAUNCHER_USM template -inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, std::complex *x, - int64_t incx, const std::vector &dependencies) { +inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const std::complex* a, std::complex* x, + int64_t incx, const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -3394,7 +3390,7 @@ inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3406,7 +3402,7 @@ inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (n > 0) { incx = std::abs(incx); - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3417,9 +3413,9 @@ inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event tpsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, T* x, int64_t incx, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -3430,9 +3426,9 @@ inline sycl::event tpsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TPSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event tpsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event tpsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return tpsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, x, incx, \ dependencies); \ } @@ -3445,10 +3441,10 @@ TPSV_LAUNCHER_USM(std::complex, rocblas_ztpsv) #undef TPSV_LAUNCHER_USM template -inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -3458,7 +3454,7 @@ inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3468,7 +3464,7 @@ inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3479,9 +3475,9 @@ inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trmv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -3492,9 +3488,9 @@ inline sycl::event trmv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TRMV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trmv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trmv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trmv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, \ dependencies); \ } @@ -3507,10 +3503,10 @@ TRMV_LAUNCHER_USM(std::complex, rocblas_ztrmv) #undef TRMV_LAUNCHER_USM template -inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const std::complex *a, int64_t lda, - std::complex *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const std::complex* a, int64_t lda, + std::complex* x, int64_t incx, + const std::vector& dependencies) { sycl::event done; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -3520,7 +3516,7 @@ inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - queue.submit([&](sycl::handler &cgh) { conj_vector(cgh, x, n, incx); }) + queue.submit([&](sycl::handler& cgh) { conj_vector(cgh, x, n, incx); }) .wait_and_throw(); } } @@ -3530,7 +3526,7 @@ inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpo if (trans == oneapi::mkl::transpose::conjtrans) { if (n > 0) { - done = queue.submit([&](sycl::handler &cgh) { + done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); conj_vector(cgh, x, n, incx); }); @@ -3541,9 +3537,9 @@ inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } template -inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, int64_t n, const T *a, int64_t lda, T *x, int64_t incx, - const std::vector &dependencies) { +inline sycl::event trsv(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, int64_t n, const T* a, int64_t lda, T* x, int64_t incx, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -3554,9 +3550,9 @@ inline sycl::event trsv(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define TRSV_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsv(sycl::queue &queue, uplo upper_lower, transpose trans, diag unit_diag, \ - int64_t n, const TYPE *a, int64_t lda, TYPE *x, int64_t incx, \ - const std::vector &dependencies) { \ + sycl::event trsv(sycl::queue& queue, uplo upper_lower, transpose trans, diag unit_diag, \ + int64_t n, const TYPE* a, int64_t lda, TYPE* x, int64_t incx, \ + const std::vector& dependencies) { \ return trsv(ROCBLAS_ROUTINE, queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, \ dependencies); \ } diff --git a/src/blas/backends/rocblas/rocblas_level3.cpp b/src/blas/backends/rocblas/rocblas_level3.cpp index 04f67ef5a..fe0dc4090 100644 --- a/src/blas/backends/rocblas/rocblas_level3.cpp +++ b/src/blas/backends/rocblas/rocblas_level3.cpp @@ -34,34 +34,34 @@ namespace column_major { // Buffer APIs template -inline void gemm(Func func, sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void gemm(Func func, sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, k, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(transb), m, n, k, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); } #define GEMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE beta, sycl::buffer& c, \ int64_t ldc) { \ gemm(ROCBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ @@ -77,9 +77,9 @@ GEMM_LAUNCHER(std::complex, rocblas_zgemm) template inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, COMPUTETYPE CT, - sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, T_S alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, T_S beta, sycl::buffer &c, + sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, T_S alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, T_S beta, sycl::buffer& c, int64_t ldc) { using rocDataType_A = typename RocEquivalentType::Type; using rocDataType_B = typename RocEquivalentType::Type; @@ -87,30 +87,30 @@ inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C using rocDataType_S = typename RocEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, k, (rocDataType_S *)&alpha, - a_, DT_A, lda, b_, DT_B, ldb, (rocDataType_S *)&beta, c_, DT_C, - ldc, c_, DT_C, ldc, CT, rocblas_gemm_algo_standard, 0, 0); + get_rocblas_operation(transb), m, n, k, (rocDataType_S*)&alpha, a_, + DT_A, lda, b_, DT_B, ldb, (rocDataType_S*)&beta, c_, DT_C, ldc, c_, + DT_C, ldc, CT, rocblas_gemm_algo_standard, 0, 0); }); }); } #define GEMM_EX_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S, ROCBLAS_ROUTINE, ROCMDATATYPE_A, \ ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE_S beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE_S beta, sycl::buffer& c, \ int64_t ldc) { \ gemm_ex(ROCBLAS_ROUTINE, ROCMDATATYPE_A, ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE, \ queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); \ @@ -130,34 +130,34 @@ GEMM_EX_LAUNCHER(bfloat16, bfloat16, bfloat16, float, rocblas_gemm_ex, rocblas_d #undef GEMM_EX_LAUNCHER template -inline void symm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), m, n, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_fill_mode(upper_lower), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); } #define SYMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ symm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, \ c, ldc); \ } @@ -170,34 +170,34 @@ SYMM_LAUNCHER(std::complex, rocblas_zsymm) #undef SYMM_LAUNCHER template -inline void hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), m, n, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_fill_mode(upper_lower), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); } #define HEMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ hemm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, \ c, ldc); \ } @@ -208,32 +208,32 @@ HEMM_LAUNCHER(std::complex, rocblas_zhemm) #undef HEMM_LAUNCHER template -inline void syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, T beta, - sycl::buffer &c, int64_t ldc) { +inline void syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, T beta, + sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + (rocDataType*)&beta, c_, ldc); }); }); } #define SYRK_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ syrk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); \ } @@ -245,33 +245,33 @@ SYRK_LAUNCHER(std::complex, rocblas_zsyrk) #undef SYRK_LAUNCHER template -inline void herk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, ScalarType alpha, sycl::buffer &a, int64_t lda, - ScalarType beta, sycl::buffer &c, int64_t ldc) { +inline void herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, ScalarType alpha, sycl::buffer& a, int64_t lda, + ScalarType beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; using rocScalarType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocScalarType *)&alpha, a_, - lda, (rocScalarType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocScalarType*)&alpha, a_, lda, + (rocScalarType*)&beta, c_, ldc); }); }); } #define HERK_LAUNCHER(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - SCALAR_TYPE alpha, sycl::buffer &a, int64_t lda, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + SCALAR_TYPE alpha, sycl::buffer& a, int64_t lda, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ herk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); \ } @@ -281,34 +281,34 @@ HERK_LAUNCHER(std::complex, double, rocblas_zherk) #undef HERK_LAUNCHER template -inline void syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); } #define SYR2K_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ syr2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ } @@ -321,37 +321,37 @@ SYR2K_LAUNCHER(std::complex, rocblas_zsyr2k) #undef SYR2K_LAUNCHER template -inline void her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, DataType alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, ScalarType beta, - sycl::buffer &c, int64_t ldc) { +inline void her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, DataType alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, ScalarType beta, + sycl::buffer& c, int64_t ldc) { using rocDataType = typename RocEquivalentType::Type; using rocScalarType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, b_, ldb, (rocScalarType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + b_, ldb, (rocScalarType*)&beta, c_, ldc); }); }); } #define HER2K_LAUNCHER(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - DATA_TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + DATA_TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ her2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ } @@ -366,26 +366,26 @@ HER2K_LAUNCHER(std::complex, double, rocblas_zher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline void trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { +inline void trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; #if ROCBLAS_VERSION_MAJOR >= 4 rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, b_, ldb, b_, ldb); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, b_, ldb); #else rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), get_rocblas_fill_mode(upper_lower), @@ -397,9 +397,9 @@ inline void trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lowe } #define TRMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trmm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, \ lda, b, ldb); \ } @@ -412,33 +412,33 @@ TRMM_LAUNCHER(std::complex, rocblas_ztrmm) #undef TRMM_LAUNCHER template -inline void trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { +inline void trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, b_, ldb); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb); }); }); } #define TRSM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trsm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, \ lda, b, ldb); \ } @@ -453,25 +453,25 @@ TRSM_LAUNCHER(std::complex, rocblas_ztrsm) // USM APIs template -inline sycl::event gemm(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event gemm(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, k, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(transb), m, n, k, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); @@ -479,10 +479,10 @@ inline sycl::event gemm(Func func, sycl::queue &queue, transpose transa, transpo } #define GEMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, \ - int64_t ldb, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, \ + int64_t ldb, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm(ROCBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, \ c, ldc, dependencies); \ } @@ -497,29 +497,29 @@ GEMM_LAUNCHER_USM(std::complex, rocblas_zgemm) template inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, - COMPUTETYPE CT, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, T_S alpha, const T_A *a, int64_t lda, - const T_B *b, int64_t ldb, T_S beta, T_C *c, int64_t ldc, - const std::vector &dependencies) { + COMPUTETYPE CT, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, int64_t k, T_S alpha, const T_A* a, int64_t lda, + const T_B* b, int64_t ldb, T_S beta, T_C* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType_A = typename RocEquivalentType::Type; using rocDataType_B = typename RocEquivalentType::Type; using rocDataType_C = typename RocEquivalentType::Type; using rocDataType_S = typename RocEquivalentType::Type; overflow_check(m, n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_operation(transa), - get_rocblas_operation(transb), m, n, k, (rocDataType_S *)&alpha, - a_, DT_A, lda, b_, DT_B, ldb, (rocDataType_S *)&beta, c_, DT_C, - ldc, c_, DT_C, ldc, CT, rocblas_gemm_algo_standard, 0, 0); + get_rocblas_operation(transb), m, n, k, (rocDataType_S*)&alpha, a_, + DT_A, lda, b_, DT_B, ldb, (rocDataType_S*)&beta, c_, DT_C, ldc, c_, + DT_C, ldc, CT, rocblas_gemm_algo_standard, 0, 0); }); }); @@ -528,10 +528,10 @@ inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE #define GEMM_EX_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S, ROCBLAS_ROUTINE, ROCMDATATYPE_A, \ ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, const TYPE_B *b, \ - int64_t ldb, TYPE_S beta, TYPE_C *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, const TYPE_B* b, \ + int64_t ldb, TYPE_S beta, TYPE_C* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm_ex(ROCBLAS_ROUTINE, ROCMDATATYPE_A, ROCMDATATYPE_B, ROCMDATATYPE_C, \ ROCMCOMPUTETYPE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ @@ -551,24 +551,24 @@ GEMM_EX_LAUNCHER_USM(bfloat16, bfloat16, bfloat16, float, rocblas_gemm_ex, rocbl #undef GEMM_EX_LAUNCHER_USM template -inline sycl::event symm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, const T *a, int64_t lda, const T *b, int64_t ldb, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), m, n, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_fill_mode(upper_lower), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); @@ -576,10 +576,10 @@ inline sycl::event symm(Func func, sycl::queue &queue, side left_right, uplo upp } #define SYMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return symm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -592,24 +592,24 @@ SYMM_LAUNCHER_USM(std::complex, rocblas_zsymm) #undef SYMM_LAUNCHER_USM template -inline sycl::event hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, const T *a, int64_t lda, const T *b, int64_t ldb, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), m, n, (rocDataType *)&alpha, - a_, lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_fill_mode(upper_lower), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); @@ -617,10 +617,10 @@ inline sycl::event hemm(Func func, sycl::queue &queue, side left_right, uplo upp } #define HEMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return hemm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -631,23 +631,23 @@ HEMM_LAUNCHER_USM(std::complex, rocblas_zhemm) #undef HEMM_LAUNCHER_USM template -inline sycl::event syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, const T *a, int64_t lda, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, const T* a, int64_t lda, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + (rocDataType*)&beta, c_, ldc); }); }); @@ -655,9 +655,9 @@ inline sycl::event syrk(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define SYRK_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syrk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, \ dependencies); \ } @@ -670,25 +670,25 @@ SYRK_LAUNCHER_USM(std::complex, rocblas_zsyrk) #undef SYRK_LAUNCHER_USM template -inline sycl::event herk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, const ScalarType alpha, const DataType *a, int64_t lda, - const ScalarType beta, DataType *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, const ScalarType alpha, const DataType* a, int64_t lda, + const ScalarType beta, DataType* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; using rocScalarType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocScalarType *)&alpha, a_, - lda, (rocScalarType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocScalarType*)&alpha, a_, lda, + (rocScalarType*)&beta, c_, ldc); }); }); @@ -696,10 +696,10 @@ inline sycl::event herk(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define HERK_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const SCALAR_TYPE alpha, const DATA_TYPE *a, int64_t lda, \ - const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const SCALAR_TYPE alpha, const DATA_TYPE* a, int64_t lda, \ + const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return herk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, \ dependencies); \ } @@ -710,25 +710,25 @@ HERK_LAUNCHER_USM(std::complex, double, rocblas_zherk) #undef HERK_LAUNCHER_USM template -inline sycl::event syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *b, - int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* b, + int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, b_, ldb, (rocDataType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + b_, ldb, (rocDataType*)&beta, c_, ldc); }); }); @@ -736,10 +736,10 @@ inline sycl::event syr2k(Func func, sycl::queue &queue, uplo upper_lower, transp } #define SYR2K_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syr2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -752,26 +752,26 @@ SYR2K_LAUNCHER_USM(std::complex, rocblas_zsyr2k) #undef SYR2K_LAUNCHER_USM template -inline sycl::event her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, const DataType alpha, const DataType *a, int64_t lda, - const DataType *b, int64_t ldb, const ScalarType beta, DataType *c, - int64_t ldc, const std::vector &dependencies) { +inline sycl::event her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, const DataType alpha, const DataType* a, int64_t lda, + const DataType* b, int64_t ldb, const ScalarType beta, DataType* c, + int64_t ldc, const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; using rocScalarType = typename RocEquivalentType::Type; overflow_check(n, k, lda, ldb, ldc); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto c_ = reinterpret_cast(c); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), n, k, (rocDataType *)&alpha, a_, - lda, b_, ldb, (rocScalarType *)&beta, c_, ldc); + get_rocblas_operation(trans), n, k, (rocDataType*)&alpha, a_, lda, + b_, ldb, (rocScalarType*)&beta, c_, ldc); }); }); @@ -779,10 +779,10 @@ inline sycl::event her2k(Func func, sycl::queue &queue, uplo upper_lower, transp } #define HER2K_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const DATA_TYPE alpha, const DATA_TYPE *a, int64_t lda, const DATA_TYPE *b, \ - int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const DATA_TYPE alpha, const DATA_TYPE* a, int64_t lda, const DATA_TYPE* b, \ + int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return her2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -797,26 +797,26 @@ HER2K_LAUNCHER_USM(std::complex, double, rocblas_zher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline sycl::event trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T *a, - int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a, + int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; #if ROCBLAS_VERSION_MAJOR >= 4 rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, b_, ldb, b_, ldb); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb, b_, ldb); #else rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), get_rocblas_fill_mode(upper_lower), @@ -830,9 +830,9 @@ inline sycl::event trmm(Func func, sycl::queue &queue, side left_right, uplo upp } #define TRMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trmm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, dependencies); \ } @@ -845,25 +845,25 @@ TRMM_LAUNCHER_USM(std::complex, rocblas_ztrmm) #undef TRMM_LAUNCHER_USM template -inline sycl::event trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T *a, - int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a, + int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { using rocDataType = typename RocEquivalentType::Type; overflow_check(m, n, lda, ldb); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler &sc) { + onemkl_rocblas_host_task(cgh, queue, [=](RocblasScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; rocblas_native_func(func, err, handle, get_rocblas_side_mode(left_right), - get_rocblas_fill_mode(upper_lower), - get_rocblas_operation(trans), get_rocblas_diag_type(unit_diag), - m, n, (rocDataType *)&alpha, a_, lda, b_, ldb); + get_rocblas_fill_mode(upper_lower), get_rocblas_operation(trans), + get_rocblas_diag_type(unit_diag), m, n, (rocDataType*)&alpha, a_, + lda, b_, ldb); }); }); @@ -871,9 +871,9 @@ inline sycl::event trsm(Func func, sycl::queue &queue, side left_right, uplo upp } #define TRSM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trsm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, dependencies); \ } @@ -892,9 +892,9 @@ namespace row_major { // Buffer APIs template -inline void gemm(Func func, sycl::queue &queue, transpose transa, transpose transb, int64_t m, - int64_t n, int64_t k, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void gemm(Func func, sycl::queue& queue, transpose transa, transpose transb, int64_t m, + int64_t n, int64_t k, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { auto new_transa = transb; auto new_transb = transa; @@ -903,9 +903,9 @@ inline void gemm(Func func, sycl::queue &queue, transpose transa, transpose tran } #define GEMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE beta, sycl::buffer& c, \ int64_t ldc) { \ gemm(ROCBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ @@ -921,9 +921,9 @@ GEMM_LAUNCHER(std::complex, rocblas_zgemm) template inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, COMPUTETYPE CT, - sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, - int64_t k, T_S alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, T_S beta, sycl::buffer &c, + sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, + int64_t k, T_S alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, T_S beta, sycl::buffer& c, int64_t ldc) { auto new_transa = transb; auto new_transb = transa; @@ -934,9 +934,9 @@ inline void gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C #define GEMM_EX_LAUNCHER(TYPE_A, TYPE_B, TYPE_C, TYPE_S, ROCBLAS_ROUTINE, ROCMDATATYPE_A, \ ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE) \ - void gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, TYPE_S beta, sycl::buffer &c, \ + void gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, TYPE_S beta, sycl::buffer& c, \ int64_t ldc) { \ gemm_ex(ROCBLAS_ROUTINE, ROCMDATATYPE_A, ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE, \ queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); \ @@ -956,9 +956,9 @@ GEMM_EX_LAUNCHER(bfloat16, bfloat16, bfloat16, float, rocblas_gemm_ex, rocblas_d #undef GEMM_EX_LAUNCHER template -inline void symm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -968,9 +968,9 @@ inline void symm(Func func, sycl::queue &queue, side left_right, uplo upper_lowe } #define SYMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ symm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, \ c, ldc); \ } @@ -983,9 +983,9 @@ SYMM_LAUNCHER(std::complex, rocblas_zsymm) #undef SYMM_LAUNCHER template -inline void hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -995,9 +995,9 @@ inline void hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lowe } #define HEMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ hemm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, \ c, ldc); \ } @@ -1008,9 +1008,9 @@ HEMM_LAUNCHER(std::complex, rocblas_zhemm) #undef HEMM_LAUNCHER template -inline void syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, T beta, - sycl::buffer &c, int64_t ldc) { +inline void syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, T beta, + sycl::buffer& c, int64_t ldc) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -1020,9 +1020,9 @@ inline void syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose tran } #define SYRK_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ syrk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); \ } @@ -1034,9 +1034,9 @@ SYRK_LAUNCHER(std::complex, rocblas_zsyrk) #undef SYRK_LAUNCHER template -inline void herk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, ScalarType alpha, sycl::buffer &a, int64_t lda, - ScalarType beta, sycl::buffer &c, int64_t ldc) { +inline void herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, ScalarType alpha, sycl::buffer& a, int64_t lda, + ScalarType beta, sycl::buffer& c, int64_t ldc) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::conjtrans @@ -1046,9 +1046,9 @@ inline void herk(Func func, sycl::queue &queue, uplo upper_lower, transpose tran } #define HERK_LAUNCHER(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - void herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - SCALAR_TYPE alpha, sycl::buffer &a, int64_t lda, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + SCALAR_TYPE alpha, sycl::buffer& a, int64_t lda, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ herk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); \ } @@ -1058,9 +1058,9 @@ HERK_LAUNCHER(std::complex, double, rocblas_zherk) #undef HERK_LAUNCHER template -inline void syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, - int64_t ldb, T beta, sycl::buffer &c, int64_t ldc) { +inline void syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, + int64_t ldb, T beta, sycl::buffer& c, int64_t ldc) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -1071,9 +1071,9 @@ inline void syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose tra } #define SYR2K_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, sycl::buffer &a, int64_t lda, sycl::buffer &b, \ - int64_t ldb, TYPE beta, sycl::buffer &c, int64_t ldc) { \ + void syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, sycl::buffer& a, int64_t lda, sycl::buffer& b, \ + int64_t ldb, TYPE beta, sycl::buffer& c, int64_t ldc) { \ syr2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ } @@ -1086,10 +1086,10 @@ SYR2K_LAUNCHER(std::complex, rocblas_zsyr2k) #undef SYR2K_LAUNCHER template -inline void her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, DataType alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb, ScalarType beta, - sycl::buffer &c, int64_t ldc) { +inline void her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, DataType alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb, ScalarType beta, + sycl::buffer& c, int64_t ldc) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::conjtrans @@ -1101,10 +1101,10 @@ inline void her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose tra } #define HER2K_LAUNCHER(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - void her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - DATA_TYPE alpha, sycl::buffer &a, int64_t lda, \ - sycl::buffer &b, int64_t ldb, SCALAR_TYPE beta, \ - sycl::buffer &c, int64_t ldc) { \ + void her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + DATA_TYPE alpha, sycl::buffer& a, int64_t lda, \ + sycl::buffer& b, int64_t ldb, SCALAR_TYPE beta, \ + sycl::buffer& c, int64_t ldc) { \ her2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, \ ldc); \ } @@ -1119,9 +1119,9 @@ HER2K_LAUNCHER(std::complex, double, rocblas_zher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline void trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { +inline void trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -1132,9 +1132,9 @@ inline void trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lowe } #define TRMM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trmm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, \ lda, b, ldb); \ } @@ -1147,9 +1147,9 @@ TRMM_LAUNCHER(std::complex, rocblas_ztrmm) #undef TRMM_LAUNCHER template -inline void trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, - diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer &a, int64_t lda, - sycl::buffer &b, int64_t ldb) { +inline void trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, + diag unit_diag, int64_t m, int64_t n, T alpha, sycl::buffer& a, int64_t lda, + sycl::buffer& b, int64_t ldb) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -1160,9 +1160,9 @@ inline void trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lowe } #define TRSM_LAUNCHER(TYPE, ROCBLAS_ROUTINE) \ - void trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer &a, \ - int64_t lda, sycl::buffer &b, int64_t ldb) { \ + void trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, sycl::buffer& a, \ + int64_t lda, sycl::buffer& b, int64_t ldb) { \ trsm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, \ lda, b, ldb); \ } @@ -1177,10 +1177,10 @@ TRSM_LAUNCHER(std::complex, rocblas_ztrsm) // USM APIs template -inline sycl::event gemm(Func func, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, T alpha, const T *a, int64_t lda, - const T *b, int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event gemm(Func func, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, int64_t k, T alpha, const T* a, int64_t lda, + const T* b, int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { auto new_transa = transb; auto new_transb = transa; @@ -1189,10 +1189,10 @@ inline sycl::event gemm(Func func, sycl::queue &queue, transpose transa, transpo } #define GEMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, \ - int64_t ldb, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, \ + int64_t ldb, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm(ROCBLAS_ROUTINE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, \ c, ldc, dependencies); \ } @@ -1207,10 +1207,10 @@ GEMM_LAUNCHER_USM(std::complex, rocblas_zgemm) template inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE_C DT_C, - COMPUTETYPE CT, sycl::queue &queue, transpose transa, transpose transb, - int64_t m, int64_t n, int64_t k, T_S alpha, const T_A *a, int64_t lda, - const T_B *b, int64_t ldb, T_S beta, T_C *c, int64_t ldc, - const std::vector &dependencies) { + COMPUTETYPE CT, sycl::queue& queue, transpose transa, transpose transb, + int64_t m, int64_t n, int64_t k, T_S alpha, const T_A* a, int64_t lda, + const T_B* b, int64_t ldb, T_S beta, T_C* c, int64_t ldc, + const std::vector& dependencies) { auto new_transa = transb; auto new_transb = transa; @@ -1220,10 +1220,10 @@ inline sycl::event gemm_ex(Func func, DATATYPE_A DT_A, DATATYPE_B DT_B, DATATYPE #define GEMM_EX_LAUNCHER_USM(TYPE_A, TYPE_B, TYPE_C, TYPE_S, ROCBLAS_ROUTINE, ROCMDATATYPE_A, \ ROCMDATATYPE_B, ROCMDATATYPE_C, ROCMCOMPUTETYPE) \ - sycl::event gemm(sycl::queue &queue, transpose transa, transpose transb, int64_t m, int64_t n, \ - int64_t k, TYPE_S alpha, const TYPE_A *a, int64_t lda, const TYPE_B *b, \ - int64_t ldb, TYPE_S beta, TYPE_C *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event gemm(sycl::queue& queue, transpose transa, transpose transb, int64_t m, int64_t n, \ + int64_t k, TYPE_S alpha, const TYPE_A* a, int64_t lda, const TYPE_B* b, \ + int64_t ldb, TYPE_S beta, TYPE_C* c, int64_t ldc, \ + const std::vector& dependencies) { \ return gemm_ex(ROCBLAS_ROUTINE, ROCMDATATYPE_A, ROCMDATATYPE_B, ROCMDATATYPE_C, \ ROCMCOMPUTETYPE, queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ @@ -1243,9 +1243,9 @@ GEMM_EX_LAUNCHER_USM(bfloat16, bfloat16, bfloat16, float, rocblas_gemm_ex, rocbl #undef GEMM_EX_LAUNCHER_USM template -inline sycl::event symm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, const T *a, int64_t lda, const T *b, int64_t ldb, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event symm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -1256,10 +1256,10 @@ inline sycl::event symm(Func func, sycl::queue &queue, side left_right, uplo upp } #define SYMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event symm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event symm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return symm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -1272,9 +1272,9 @@ SYMM_LAUNCHER_USM(std::complex, rocblas_zsymm) #undef SYMM_LAUNCHER_USM template -inline sycl::event hemm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, - int64_t n, T alpha, const T *a, int64_t lda, const T *b, int64_t ldb, - T beta, T *c, int64_t ldc, const std::vector &dependencies) { +inline sycl::event hemm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, + int64_t n, T alpha, const T* a, int64_t lda, const T* b, int64_t ldb, + T beta, T* c, int64_t ldc, const std::vector& dependencies) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -1285,10 +1285,10 @@ inline sycl::event hemm(Func func, sycl::queue &queue, side left_right, uplo upp } #define HEMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event hemm(sycl::queue &queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event hemm(sycl::queue& queue, side left_right, uplo upper_lower, int64_t m, int64_t n, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return hemm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -1299,9 +1299,9 @@ HEMM_LAUNCHER_USM(std::complex, rocblas_zhemm) #undef HEMM_LAUNCHER_USM template -inline sycl::event syrk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, T alpha, const T *a, int64_t lda, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syrk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, T alpha, const T* a, int64_t lda, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -1312,9 +1312,9 @@ inline sycl::event syrk(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define SYRK_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syrk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syrk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syrk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, \ dependencies); \ } @@ -1327,10 +1327,10 @@ SYRK_LAUNCHER_USM(std::complex, rocblas_zsyrk) #undef SYRK_LAUNCHER_USM template -inline sycl::event herk(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, - int64_t k, const ScalarType alpha, const DataType *a, int64_t lda, - const ScalarType beta, DataType *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event herk(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, + int64_t k, const ScalarType alpha, const DataType* a, int64_t lda, + const ScalarType beta, DataType* c, int64_t ldc, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::conjtrans @@ -1341,10 +1341,10 @@ inline sycl::event herk(Func func, sycl::queue &queue, uplo upper_lower, transpo } #define HERK_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - sycl::event herk(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const SCALAR_TYPE alpha, const DATA_TYPE *a, int64_t lda, \ - const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event herk(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const SCALAR_TYPE alpha, const DATA_TYPE* a, int64_t lda, \ + const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return herk(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, \ dependencies); \ } @@ -1355,10 +1355,10 @@ HERK_LAUNCHER_USM(std::complex, double, rocblas_zherk) #undef HERK_LAUNCHER_USM template -inline sycl::event syr2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, T alpha, const T *a, int64_t lda, const T *b, - int64_t ldb, T beta, T *c, int64_t ldc, - const std::vector &dependencies) { +inline sycl::event syr2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, T alpha, const T* a, int64_t lda, const T* b, + int64_t ldb, T beta, T* c, int64_t ldc, + const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::trans @@ -1369,10 +1369,10 @@ inline sycl::event syr2k(Func func, sycl::queue &queue, uplo upper_lower, transp } #define SYR2K_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event syr2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - TYPE alpha, const TYPE *a, int64_t lda, const TYPE *b, int64_t ldb, \ - TYPE beta, TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event syr2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + TYPE alpha, const TYPE* a, int64_t lda, const TYPE* b, int64_t ldb, \ + TYPE beta, TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return syr2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -1385,10 +1385,10 @@ SYR2K_LAUNCHER_USM(std::complex, rocblas_zsyr2k) #undef SYR2K_LAUNCHER_USM template -inline sycl::event her2k(Func func, sycl::queue &queue, uplo upper_lower, transpose trans, - int64_t n, int64_t k, const DataType alpha, const DataType *a, int64_t lda, - const DataType *b, int64_t ldb, const ScalarType beta, DataType *c, - int64_t ldc, const std::vector &dependencies) { +inline sycl::event her2k(Func func, sycl::queue& queue, uplo upper_lower, transpose trans, + int64_t n, int64_t k, const DataType alpha, const DataType* a, int64_t lda, + const DataType* b, int64_t ldb, const ScalarType beta, DataType* c, + int64_t ldc, const std::vector& dependencies) { auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper : oneapi::mkl::uplo::lower; auto new_trans = trans == oneapi::mkl::transpose::nontrans ? oneapi::mkl::transpose::conjtrans @@ -1400,10 +1400,10 @@ inline sycl::event her2k(Func func, sycl::queue &queue, uplo upper_lower, transp } #define HER2K_LAUNCHER_USM(DATA_TYPE, SCALAR_TYPE, ROCBLAS_ROUTINE) \ - sycl::event her2k(sycl::queue &queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ - const DATA_TYPE alpha, const DATA_TYPE *a, int64_t lda, const DATA_TYPE *b, \ - int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE *c, int64_t ldc, \ - const std::vector &dependencies) { \ + sycl::event her2k(sycl::queue& queue, uplo upper_lower, transpose trans, int64_t n, int64_t k, \ + const DATA_TYPE alpha, const DATA_TYPE* a, int64_t lda, const DATA_TYPE* b, \ + int64_t ldb, const SCALAR_TYPE beta, DATA_TYPE* c, int64_t ldc, \ + const std::vector& dependencies) { \ return her2k(ROCBLAS_ROUTINE, queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, \ beta, c, ldc, dependencies); \ } @@ -1418,10 +1418,10 @@ HER2K_LAUNCHER_USM(std::complex, double, rocblas_zher2k) // separated from the B matrix. It is possible to use B instead of C, but this // will slow-down the code. template -inline sycl::event trmm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T *a, - int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event trmm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a, + int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -1432,9 +1432,9 @@ inline sycl::event trmm(Func func, sycl::queue &queue, side left_right, uplo upp } #define TRMM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trmm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trmm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trmm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, dependencies); \ } @@ -1447,10 +1447,10 @@ TRMM_LAUNCHER_USM(std::complex, rocblas_ztrmm) #undef TRMM_LAUNCHER_USM template -inline sycl::event trsm(Func func, sycl::queue &queue, side left_right, uplo upper_lower, - transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T *a, - int64_t lda, T *b, int64_t ldb, - const std::vector &dependencies) { +inline sycl::event trsm(Func func, sycl::queue& queue, side left_right, uplo upper_lower, + transpose trans, diag unit_diag, int64_t m, int64_t n, T alpha, const T* a, + int64_t lda, T* b, int64_t ldb, + const std::vector& dependencies) { auto new_side = left_right == oneapi::mkl::side::left ? oneapi::mkl::side::right : oneapi::mkl::side::left; auto new_uplo = upper_lower == oneapi::mkl::uplo::lower ? oneapi::mkl::uplo::upper @@ -1461,9 +1461,9 @@ inline sycl::event trsm(Func func, sycl::queue &queue, side left_right, uplo upp } #define TRSM_LAUNCHER_USM(TYPE, ROCBLAS_ROUTINE) \ - sycl::event trsm(sycl::queue &queue, side left_right, uplo upper_lower, transpose trans, \ - diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE *a, int64_t lda, \ - TYPE *b, int64_t ldb, const std::vector &dependencies) { \ + sycl::event trsm(sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, \ + diag unit_diag, int64_t m, int64_t n, TYPE alpha, const TYPE* a, int64_t lda, \ + TYPE* b, int64_t ldb, const std::vector& dependencies) { \ return trsm(ROCBLAS_ROUTINE, queue, left_right, upper_lower, trans, unit_diag, m, n, \ alpha, a, lda, b, ldb, dependencies); \ } diff --git a/src/blas/backends/rocblas/rocblas_scope_handle.cpp b/src/blas/backends/rocblas/rocblas_scope_handle.cpp index 8f48aed3d..9574271b4 100644 --- a/src/blas/backends/rocblas/rocblas_scope_handle.cpp +++ b/src/blas/backends/rocblas/rocblas_scope_handle.cpp @@ -26,7 +26,7 @@ namespace rocblas { template rocblas_handle_container::~rocblas_handle_container() noexcept(false) { - for (auto &handle_pair : rocblas_handle_container_mapper_) { + for (auto& handle_pair : rocblas_handle_container_mapper_) { rocblas_status err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); @@ -59,7 +59,7 @@ thread_local rocblas_handle_container RocblasScopedContextHandler::h #endif RocblasScopedContextHandler::RocblasScopedContextHandler(sycl::queue queue, - sycl::interop_handle &ih) + sycl::interop_handle& ih) : interop_h(ih), needToRecover_(false) { placedContext_ = new sycl::context(queue.get_context()); @@ -89,8 +89,8 @@ RocblasScopedContextHandler::~RocblasScopedContextHandler() noexcept(false) { delete placedContext_; } -void ContextCallback(void *userData) { - auto *ptr = static_cast *>(userData); +void ContextCallback(void* userData) { + auto* ptr = static_cast*>(userData); if (!ptr) { return; } @@ -108,7 +108,7 @@ void ContextCallback(void *userData) { } } -rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue &queue) { +rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue& queue) { auto hipDevice = interop_h.get_native_device(); hipError_t hipErr; hipCtx_t desired; @@ -154,10 +154,10 @@ rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue &queue) return handle; } -hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue &queue) { +hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue& queue) { return sycl::get_native(queue); } -sycl::context RocblasScopedContextHandler::get_context(const sycl::queue &queue) { +sycl::context RocblasScopedContextHandler::get_context(const sycl::queue& queue) { return queue.get_context(); } diff --git a/src/blas/backends/rocblas/rocblas_scope_handle.hpp b/src/blas/backends/rocblas/rocblas_scope_handle.hpp index 76c849ef8..7b2438a54 100644 --- a/src/blas/backends/rocblas/rocblas_scope_handle.hpp +++ b/src/blas/backends/rocblas/rocblas_scope_handle.hpp @@ -45,29 +45,29 @@ namespace rocblas { template struct rocblas_handle_container { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t rocblas_handle_container_mapper_{}; ~rocblas_handle_container() noexcept(false); }; class RocblasScopedContextHandler { HIPcontext original_; - sycl::context *placedContext_; + sycl::context* placedContext_; bool needToRecover_; - sycl::interop_handle &interop_h; + sycl::interop_handle& interop_h; #ifdef ONEMKL_PI_INTERFACE_REMOVED static thread_local rocblas_handle_container handle_helper; #else static thread_local rocblas_handle_container handle_helper; #endif - sycl::context get_context(const sycl::queue &queue); - hipStream_t get_stream(const sycl::queue &queue); + sycl::context get_context(const sycl::queue& queue); + hipStream_t get_stream(const sycl::queue& queue); public: - RocblasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + RocblasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); ~RocblasScopedContextHandler() noexcept(false); - rocblas_handle get_handle(const sycl::queue &queue); + rocblas_handle get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. diff --git a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp index da9791411..64d883b52 100644 --- a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp +++ b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.cpp @@ -27,7 +27,7 @@ namespace blas { namespace rocblas { rocblas_handle_container::~rocblas_handle_container() noexcept(false) { - for (auto &handle_pair : rocblas_handle_mapper_) { + for (auto& handle_pair : rocblas_handle_mapper_) { rocblas_status err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); @@ -46,10 +46,10 @@ thread_local rocblas_handle_container RocblasScopedContextHandler::handle_helper rocblas_handle_container{}; RocblasScopedContextHandler::RocblasScopedContextHandler(sycl::queue queue, - sycl::interop_handle &ih) + sycl::interop_handle& ih) : interop_h(ih) {} -rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue &queue) { +rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue& queue) { sycl::device device = queue.get_device(); int current_device = interop_h.get_native_device(); hipStream_t streamId = get_stream(queue); @@ -84,7 +84,7 @@ rocblas_handle RocblasScopedContextHandler::get_handle(const sycl::queue &queue) return handle; } -hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue &queue) { +hipStream_t RocblasScopedContextHandler::get_stream(const sycl::queue& queue) { return interop_h.get_native_queue(); } diff --git a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp index 3c156ab6c..07d0d8292 100644 --- a/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp +++ b/src/blas/backends/rocblas/rocblas_scope_handle_hipsycl.hpp @@ -35,7 +35,7 @@ namespace blas { namespace rocblas { struct rocblas_handle_container { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t rocblas_handle_mapper_{}; ~rocblas_handle_container() noexcept(false); }; @@ -43,13 +43,13 @@ struct rocblas_handle_container { class RocblasScopedContextHandler { sycl::interop_handle interop_h; static thread_local rocblas_handle_container handle_helper; - sycl::context get_context(const sycl::queue &queue); - hipStream_t get_stream(const sycl::queue &queue); + sycl::context get_context(const sycl::queue& queue); + hipStream_t get_stream(const sycl::queue& queue); public: - RocblasScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + RocblasScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); - rocblas_handle get_handle(const sycl::queue &queue); + rocblas_handle get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. diff --git a/src/blas/backends/rocblas/rocblas_task.hpp b/src/blas/backends/rocblas/rocblas_task.hpp index 3b872e536..a855d9a72 100644 --- a/src/blas/backends/rocblas/rocblas_task.hpp +++ b/src/blas/backends/rocblas/rocblas_task.hpp @@ -53,7 +53,7 @@ namespace rocblas { #ifdef __HIPSYCL__ template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { cgh.hipSYCL_enqueue_custom_operation([f, queue](sycl::interop_handle ih) { auto sc = RocblasScopedContextHandler(queue, ih); f(sc); @@ -61,9 +61,9 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } #else template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih) { #else cgh.host_task([f, queue](sycl::interop_handle ih) { #endif @@ -73,7 +73,7 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } #endif template -static inline void onemkl_rocblas_host_task(H &cgh, sycl::queue queue, F f) { +static inline void onemkl_rocblas_host_task(H& cgh, sycl::queue queue, F f) { (void)host_task_internal(cgh, queue, f); } diff --git a/src/blas/blas_loader.cpp b/src/blas/blas_loader.cpp index 1c3cfcb71..e2276c0fe 100644 --- a/src/blas/blas_loader.cpp +++ b/src/blas/blas_loader.cpp @@ -32,3962 +32,3962 @@ static oneapi::mkl::detail::table_initializer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_scasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dzasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_sasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dasum_sycl(queue, n, x, incx, result); } -void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_saxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_caxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zaxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_scopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dcopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_ccopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zcopy_sycl(queue, n, x, incx, y, incy); } -void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_scopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dcopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ccopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zcopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_sdot_sycl(queue, n, x, incx, y, incy, result); } -void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_ddot_sycl(queue, n, x, incx, y, incy, result); } -void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dsdot_sycl(queue, n, x, incx, y, incy, result); } -void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].column_major_cdotc_sycl(queue, n, x, incx, y, incy, result); } -void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].column_major_zdotc_sycl(queue, n, x, incx, y, incy, result); } -void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].column_major_cdotu_sycl(queue, n, x, incx, y, incy, result); } -void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].column_major_zdotu_sycl(queue, n, x, incx, y, incy, result); } -void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_isamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_idamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_icamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_izamin_sycl(queue, n, x, incx, result); } -void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_isamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_idamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_icamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_izamax_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_scnrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dznrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_snrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_dnrm2_sycl(queue, n, x, incx, result); } -void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { +void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { function_tables[{ libkey, queue }].column_major_srot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { +void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { function_tables[{ libkey, queue }].column_major_drot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { function_tables[{ libkey, queue }].column_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { function_tables[{ libkey, queue }].column_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); } -void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { function_tables[{ libkey, queue }].column_major_srotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { function_tables[{ libkey, queue }].column_major_drotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { function_tables[{ libkey, queue }].column_major_crotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { function_tables[{ libkey, queue }].column_major_zrotg_sycl(queue, a, b, c, s); } -void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { function_tables[{ libkey, queue }].column_major_srotm_sycl(queue, n, x, incx, y, incy, param); } -void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param) { function_tables[{ libkey, queue }].column_major_drotm_sycl(queue, n, x, incx, y, incy, param); } -void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { function_tables[{ libkey, queue }].column_major_srotmg_sycl(queue, d1, d2, x1, y1, param); } -void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { function_tables[{ libkey, queue }].column_major_drotmg_sycl(queue, d1, d2, x1, y1, param); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_sscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_cscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_csscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_zscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_zdscal_sycl(queue, n, alpha, x, incx); } -void sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { +void sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { function_tables[{ libkey, queue }].column_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, result); } -void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_sswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_cswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zswap_sycl(queue, n, x, incx, y, incy); } -void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, +void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_sgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, +void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_cgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_sdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ddgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, +void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_cdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, +void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].column_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].column_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].column_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].column_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { +void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy) { +void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { function_tables[{ libkey, queue }].column_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { function_tables[{ libkey, queue }].column_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { function_tables[{ libkey, queue }].column_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { function_tables[{ libkey, queue }].column_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { +void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { +void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].column_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].column_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].column_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_chemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zhemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { +void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, - std::int64_t lda, double beta, sycl::buffer, 1> &c, +void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_cher2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zher2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_ssymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_csymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, +void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ssyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, +void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dsyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_csyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zsyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_ssyr2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dsyr2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_csyr2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zsyr2k_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_strmm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_dtrmm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_ctrmm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_ztrmm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_strsm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_dtrsm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_ctrsm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_ztrsm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_sgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_cgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_hgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_strsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dtrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ctrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_ztrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_sgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_dgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_cgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].column_major_gemm_s8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].column_major_gemm_u8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].column_major_gemm_u8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_comatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zomatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, float beta, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_somatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_domatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_comatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].column_major_zomatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].column_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].column_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].column_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].column_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].column_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_somatadd_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_domatadd_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_comatadd_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].column_major_zomatadd_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } // USM APIs -sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dzasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + float* alpha, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_saxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + double* alpha, const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_daxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_caxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zaxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_saxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_daxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_caxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zaxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_saxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_daxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_caxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zaxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ccopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ccopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sdot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ddot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsdot_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cdotc_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdotc_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cdotu_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdotu_usm_sycl( queue, n, x, incx, y, incy, result, dependencies); } -sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_isamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_idamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_icamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_izamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_isamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_idamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_icamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_izamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_scnrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dznrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_snrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dnrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, const std::vector &dependencies) { +sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, +sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csrot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, float* a, float* b, float* c, + float* s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_srotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_drotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_crotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zrotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_srotm_usm_sycl( queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_drotm_usm_sycl( queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, double* d1, double* d2, + double* x1, double y1, double* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { +sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sdsdot_usm_sycl( queue, n, sb, x, incx, y, incy, result, dependencies); } -sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, std::int64_t incy, +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, std::int64_t incy, +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ddgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ddgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { +sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sger_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dger_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgerc_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgerc_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgeru_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgeru_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cher_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zher_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cher2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zher2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chpmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhpmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { +sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chpr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { +sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhpr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chpr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhpr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sspmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dspmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { +sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sspr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { +sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dspr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, const std::vector &dependencies) { +sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sspr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, const std::vector &dependencies) { +sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dspr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { +sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_stbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_stbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_stpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_stpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_hgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_bf16bf16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_chemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zhemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, float *c, +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ssyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_csyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_strsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dtrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ctrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_ztrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_hgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_hgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_f16f16f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, const std::vector &dependencies) { + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_sgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, - double *c, std::int64_t ldc, const std::vector &dependencies) { + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_s8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_u8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_gemm_u8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_simatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float *b, std::int64_t ldb, const std::vector &dependencies) { +sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { +sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, std::int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - std::complex *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - std::complex *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_simatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_somatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_domatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_comatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zomatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, float **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, float** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_simatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, double **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, double** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_dimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_cimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].column_major_zimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } @@ -4001,3962 +4001,3962 @@ static oneapi::mkl::detail::table_initializer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_scasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dzasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_sasum_sycl(queue, n, x, incx, result); } -void asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dasum_sycl(queue, n, x, incx, result); } -void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_saxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_daxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_caxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zaxpy_sycl(queue, n, alpha, x, incx, y, incy); } -void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_saxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_caxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, std::int64_t stridex, sycl::buffer, 1> &y, +void axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, std::int64_t stridex, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zaxpy_batch_strided_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size); } -void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_saxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_daxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_caxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zaxpby_sycl(queue, n, alpha, x, incx, beta, y, incy); } -void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_scopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dcopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_ccopy_sycl(queue, n, x, incx, y, incy); } -void copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zcopy_sycl(queue, n, x, incx, y, incy); } -void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_scopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dcopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ccopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, +void copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zcopy_batch_strided_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size); } -void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_sdot_sycl(queue, n, x, incx, y, incy, result); } -void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_ddot_sycl(queue, n, x, incx, y, incy, result); } -void dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result) { +void dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dsdot_sycl(queue, n, x, incx, y, incy, result); } -void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].row_major_cdotc_sycl(queue, n, x, incx, y, incy, result); } -void dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].row_major_zdotc_sycl(queue, n, x, incx, y, incy, result); } -void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].row_major_cdotu_sycl(queue, n, x, incx, y, incy, result); } -void dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result) { +void dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result) { function_tables[{ libkey, queue }].row_major_zdotu_sycl(queue, n, x, incx, y, incy, result); } -void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_isamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_idamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_icamin_sycl(queue, n, x, incx, result); } -void iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_izamin_sycl(queue, n, x, incx, result); } -void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_isamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_idamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_icamax_sycl(queue, n, x, incx, result); } -void iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_izamax_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_scnrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result) { +void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dznrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result) { +void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_snrm2_sycl(queue, n, x, incx, result); } -void nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &result) { +void nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_dnrm2_sycl(queue, n, x, incx, result); } -void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, float s) { +void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s) { function_tables[{ libkey, queue }].row_major_srot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, double c, double s) { +void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s) { function_tables[{ libkey, queue }].row_major_drot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, float c, float s) { +void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s) { function_tables[{ libkey, queue }].row_major_csrot_sycl(queue, n, x, incx, y, incy, c, s); } -void rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, double c, double s) { +void rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s) { function_tables[{ libkey, queue }].row_major_zdrot_sycl(queue, n, x, incx, y, incy, c, s); } -void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { function_tables[{ libkey, queue }].row_major_srotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, sycl::buffer &s) { +void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, sycl::buffer& s) { function_tables[{ libkey, queue }].row_major_drotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { function_tables[{ libkey, queue }].row_major_crotg_sycl(queue, a, b, c, s); } -void rotg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s) { +void rotg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s) { function_tables[{ libkey, queue }].row_major_zrotg_sycl(queue, a, b, c, s); } -void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m) { +void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param) { function_tables[{ libkey, queue }].row_major_srotm_sycl(queue, n, x, incx, y, incy, param); } -void rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m) { +void rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param) { function_tables[{ libkey, queue }].row_major_drotm_sycl(queue, n, x, incx, y, incy, param); } -void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m) { +void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param) { function_tables[{ libkey, queue }].row_major_srotmg_sycl(queue, d1, d2, x1, y1, param); } -void rotmg(oneapi::mkl::device libkey, sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, double y1, - sycl::buffer ¶m) { +void rotmg(oneapi::mkl::device libkey, sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, double y1, + sycl::buffer& param) { function_tables[{ libkey, queue }].row_major_drotmg_sycl(queue, d1, d2, x1, y1, param); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_sscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_cscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_csscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_zscal_sycl(queue, n, alpha, x, incx); } -void scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx) { +void scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_zdscal_sycl(queue, n, alpha, x, incx); } -void sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &result) { +void sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& result) { function_tables[{ libkey, queue }].row_major_sdsdot_sycl(queue, n, sb, x, incx, y, incy, result); } -void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy) { +void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_sswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, sycl::buffer &y, +void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_cswap_sycl(queue, n, x, incx, y, incy); } -void swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy) { +void swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zswap_sycl(queue, n, x, incx, y, incy); } -void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy) { +void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_sgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_cgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zgbmv_sycl(queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_sgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_cgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zgemv_sycl(queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } -void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, float beta, sycl::buffer &y, std::int64_t incy, +void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_sgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, double beta, sycl::buffer &y, std::int64_t incy, +void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &x, +void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy, std::int64_t stridey, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_cgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zgemv_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size); } -void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_sdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, std::int64_t stridec, +void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ddgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, +void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_cdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, std::int64_t m, - std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer, 1> &c, std::int64_t ldc, +void dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, std::int64_t m, + std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zdgmm_batch_strided_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size); } -void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_sger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_dger_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_cgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_zgerc_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_cgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_zgeru_sycl(queue, m, n, alpha, x, incx, y, incy, a, lda); } -void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_chbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, std::int64_t incy) { +void hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zhbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_chemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zhemv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_cher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda) { +void her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_zher_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_cher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda) { +void her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_zher2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_chpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy) { +void hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, + sycl::buffer, 1>& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_zhpmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].row_major_chpr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a) { +void hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].row_major_zhpr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].row_major_chpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a) { +void hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a) { function_tables[{ libkey, queue }].row_major_zhpr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, sycl::buffer &y, +void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_ssbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, sycl::buffer &y, +void sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dsbmv_sycl(queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } -void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy) { +void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_sspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy) { +void spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dspmv_sycl(queue, upper_lower, n, alpha, a, x, incx, beta, y, incy); } -void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { function_tables[{ libkey, queue }].row_major_sspr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a) { +void spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a) { function_tables[{ libkey, queue }].row_major_dspr_sycl(queue, upper_lower, n, alpha, x, incx, a); } -void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { function_tables[{ libkey, queue }].row_major_sspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a) { +void spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a) { function_tables[{ libkey, queue }].row_major_dspr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a); } -void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, std::int64_t incy) { +void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_ssymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, sycl::buffer &x, - std::int64_t incx, double beta, sycl::buffer &y, std::int64_t incy) { +void symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, sycl::buffer& x, + std::int64_t incx, double beta, sycl::buffer& y, std::int64_t incy) { function_tables[{ libkey, queue }].row_major_dsymv_sycl(queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); } -void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_ssyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &a, +void syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_dsyr_sycl(queue, upper_lower, n, alpha, x, incx, a, lda); } -void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_ssyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, std::int64_t lda) { +void syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda) { function_tables[{ libkey, queue }].row_major_dsyr2_sycl(queue, upper_lower, n, alpha, x, incx, y, incy, a, lda); } -void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_stbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztbmv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_stbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx) { +void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztbsv_sycl(queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); } -void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_stpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztpmv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_stpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, sycl::buffer &x, +void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx) { +void tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztpsv_sycl(queue, upper_lower, trans, unit_diag, n, a, x, incx); } -void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_strmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztrmv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_strsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx) { +void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_dtrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ctrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, std::int64_t incx) { +void trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx) { function_tables[{ libkey, queue }].row_major_ztrsv_sycl(queue, upper_lower, trans, unit_diag, n, a, lda, x, incx); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_sgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_cgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::half beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::half beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_hgemm_sycl(queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_chemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zhemm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1> &a, - std::int64_t lda, float beta, sycl::buffer, 1> &c, std::int64_t ldc) { +void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer, 1>& a, + std::int64_t lda, float beta, sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_cherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1> &a, - std::int64_t lda, double beta, sycl::buffer, 1> &c, +void herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer, 1>& a, + std::int64_t lda, double beta, sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zherk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, float beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, float beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_cher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, double beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, double beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zher2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_ssymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_csymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zsymm_sycl(queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } -void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_ssyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &c, std::int64_t ldc) { +void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_csyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zsyrk_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc); } -void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer &c, +void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ssyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer &c, +void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dsyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_csyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zsyrk_batch_strided_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size); } -void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, sycl::buffer &c, +void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_ssyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc) { +void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_csyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +void syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zsyr2k_sycl(queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_strmm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_dtrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_ctrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_ztrmm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_strsm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_dtrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_ctrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb) { + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_ztrsm_sycl(queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_sgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_cgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::half beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::half beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_hgemm_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - float beta, sycl::buffer &c, std::int64_t ldc, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_strided_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size); } -void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_strsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dtrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ctrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +void trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_ztrsm_batch_strided_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, - transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc) { +void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, + transpose transb, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_sgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, double beta, sycl::buffer &c, std::int64_t ldc) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, double beta, sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_dgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_cgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose transa, +void gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zgemmt_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } -void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].row_major_gemm_s8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, int8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, int8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].row_major_gemm_u8s8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, - sycl::buffer &b, std::int64_t ldb, uint8_t bo, float beta, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &co) { + sycl::buffer& a, std::int64_t lda, uint8_t ao, + sycl::buffer& b, std::int64_t ldb, uint8_t bo, float beta, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& co) { function_tables[{ libkey, queue }].row_major_gemm_u8u8s32_bias_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co); } -void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, +void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_comatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zomatcopy_batch_strided_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size); } -void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +void imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, std::int64_t ldb, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size); } -void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, float beta, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_somatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_domatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, - std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, + std::int64_t ldb, std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_comatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +void omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, + sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size) { function_tables[{ libkey, queue }].row_major_zomatadd_batch_strided_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size); } -void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_somatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb) { +void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_domatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_comatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, std::int64_t ldb) { +void omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_zomatcopy_sycl(queue, trans, m, n, alpha, a, lda, b, ldb); } -void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].row_major_somatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, std::int64_t ldb, +void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].row_major_domatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].row_major_comatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, std::int64_t stridea, sycl::buffer, 1> &b, +void omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb) { function_tables[{ libkey, queue }].row_major_zomatcopy2_sycl(queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb); } -void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_simatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &ab, std::int64_t lda, +void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_dimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_cimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, sycl::buffer, 1> &ab, +void imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb) { function_tables[{ libkey, queue }].row_major_zimatcopy_sycl(queue, trans, m, n, alpha, ab, lda, ldb); } -void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_somatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc) { +void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_domatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_comatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } -void omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +void omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc) { + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc) { function_tables[{ libkey, queue }].row_major_zomatadd_sycl(queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc); } // USM APIs -sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dzasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event asum(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event asum(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dasum_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_saxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_daxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_caxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zaxpy_usm_sycl(queue, n, alpha, x, incx, y, incy, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - float *alpha, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + float* alpha, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_saxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - double *alpha, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + double* alpha, const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_daxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_caxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex *alpha, const std::complex **x, - std::int64_t *incx, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex* alpha, const std::complex** x, + std::int64_t* incx, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zaxpy_batch_group_usm_sycl( queue, n, alpha, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_saxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_daxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_caxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, +sycl::event axpy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zaxpy_batch_strided_usm_sycl( queue, n, alpha, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_saxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_daxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_caxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event axpby(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event axpby(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zaxpby_usm_sycl( queue, n, alpha, x, incx, beta, y, incy, dependencies); } -sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dcopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ccopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event copy(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zcopy_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - const float **x, std::int64_t *incx, float **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + const float** x, std::int64_t* incx, float** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - const double **x, std::int64_t *incx, double **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + const double** x, std::int64_t* incx, double** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ccopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zcopy_batch_group_usm_sycl( queue, n, x, incx, y, incy, group_count, group_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const float *x, std::int64_t incx, std::int64_t stridex, float *y, +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const float* x, std::int64_t incx, std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const double *x, std::int64_t incx, std::int64_t stridex, double *y, +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const double* x, std::int64_t incx, std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ccopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event copy_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zcopy_batch_strided_usm_sycl( queue, n, x, incx, stridex, y, incy, stridey, batch_size, dependencies); } -sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, float *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sdot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ddot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, double *result, - const std::vector &dependencies) { +sycl::event dot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsdot_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cdotc_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdotc_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cdotu_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event dotu(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *result, - const std::vector &dependencies) { +sycl::event dotu(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdotu_usm_sycl(queue, n, x, incx, y, incy, result, dependencies); } -sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_isamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_idamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_icamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamin(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamin(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_izamin_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_isamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_idamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_icamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event iamax(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, std::int64_t *result, - const std::vector &dependencies) { +sycl::event iamax(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, std::int64_t* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_izamax_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, float *result, - const std::vector &dependencies) { +sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, float* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_scnrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, double *result, - const std::vector &dependencies) { +sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, double* result, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dznrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, const std::vector &dependencies) { +sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_snrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, const std::vector &dependencies) { +sycl::event nrm2(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dnrm2_usm_sycl(queue, n, x, incx, result, dependencies); } -sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, float c, float s, const std::vector &dependencies) { +sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, float c, float s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_srot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, +sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_drot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, float s, - const std::vector &dependencies) { +sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csrot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double c, double s, - const std::vector &dependencies) { +sycl::event rot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdrot_usm_sycl(queue, n, x, incx, y, incy, c, s, dependencies); } -sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, float *a, float *b, float *c, - float *s, const std::vector &dependencies) { +sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, float* a, float* b, float* c, + float* s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_srotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, double *a, double *b, double *c, - double *s, const std::vector &dependencies) { +sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, double* a, double* b, double* c, + double* s, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_drotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, - std::complex *b, float *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, std::complex* a, + std::complex* b, float* c, std::complex* s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_crotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotg(oneapi::mkl::device libkey, sycl::queue &queue, std::complex *a, - std::complex *b, double *c, std::complex *s, - const std::vector &dependencies) { +sycl::event rotg(oneapi::mkl::device libkey, sycl::queue& queue, std::complex* a, + std::complex* b, double* c, std::complex* s, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zrotg_usm_sycl(queue, a, b, c, s, dependencies); } -sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float *param, - const std::vector &dependencies) { +sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_srotm_usm_sycl(queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotm(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, double *param, - const std::vector &dependencies) { +sycl::event rotm(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_drotm_usm_sycl(queue, n, x, incx, y, incy, param, dependencies); } -sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, const std::vector &dependencies) { +sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, float* d1, float* d2, float* x1, + float y1, float* param, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_srotmg_usm_sycl(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue &queue, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies) { +sycl::event rotmg(oneapi::mkl::device libkey, sycl::queue& queue, double* d1, double* d2, + double* x1, double y1, double* param, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_drotmg_usm_sycl(queue, d1, d2, x1, y1, param, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event scal(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event scal(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdscal_usm_sycl(queue, n, alpha, x, incx, dependencies); } -sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *result, const std::vector &dependencies) { +sycl::event sdsdot(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float sb, + const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* result, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sdsdot_usm_sycl(queue, n, sb, x, incx, y, incy, result, dependencies); } -sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event swap(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event swap(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zswap_usm_sycl(queue, n, x, incx, y, incy, dependencies); } -sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, float beta, float *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, float beta, float* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, double beta, double *y, - std::int64_t incy, const std::vector &dependencies) { +sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, double beta, double* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, +sycl::event gbmv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *x, - std::int64_t incx, std::complex beta, std::complex *y, - std::int64_t incy, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* x, + std::int64_t incx, std::complex beta, std::complex* y, + std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgbmv_usm_sycl( queue, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event gemv(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemv_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float beta, float *y, std::int64_t incy, +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double beta, double *y, std::int64_t incy, +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemv_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, x, incx, stridex, beta, y, incy, stridey, batch_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, const float **x, std::int64_t *incx, float *beta, - float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, const float** x, std::int64_t* incx, float* beta, + float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, const double **x, std::int64_t *incx, double *beta, - double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, const double** x, std::int64_t* incx, double* beta, + double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, - std::complex *beta, std::complex **y, std::int64_t *incy, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemv_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, + std::complex* beta, std::complex** y, std::int64_t* incy, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemv_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, x, incx, beta, y, incy, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const float *a, std::int64_t lda, - std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const float* a, std::int64_t lda, + std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const double *a, std::int64_t lda, - std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const double* a, std::int64_t lda, + std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ddgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, - std::int64_t m, std::int64_t n, const std::complex *a, - std::int64_t lda, std::int64_t stridea, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *c, +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, + std::int64_t m, std::int64_t n, const std::complex* a, + std::int64_t lda, std::int64_t stridea, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdgmm_batch_strided_usm_sycl( queue, left_right, m, n, a, lda, stridea, x, incx, stridex, c, ldc, stridec, batch_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const float **a, std::int64_t *lda, - const float **x, std::int64_t *incx, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const float** a, std::int64_t* lda, + const float** x, std::int64_t* incx, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const double **a, std::int64_t *lda, - const double **x, std::int64_t *incx, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const double** a, std::int64_t* lda, + const double** x, std::int64_t* incx, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ddgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - std::int64_t *m, std::int64_t *n, const std::complex **a, - std::int64_t *lda, const std::complex **x, std::int64_t *incx, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event dgmm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + std::int64_t* m, std::int64_t* n, const std::complex** a, + std::int64_t* lda, const std::complex** x, std::int64_t* incx, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zdgmm_batch_group_usm_sycl( queue, left_right, m, n, a, lda, x, incx, c, ldc, group_count, group_size, dependencies); } -sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { +sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sger_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event ger(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event ger(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dger_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgerc_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event gerc(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event gerc(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgerc_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgeru_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event geru(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event geru(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgeru_usm_sycl( queue, m, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event hemv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hemv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhemv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cher_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event her(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zher_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cher2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event her2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - std::int64_t lda, const std::vector &dependencies) { +sycl::event her2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zher2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chpmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *a, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event hpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* a, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhpmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { +sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event hpr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const std::complex *x, std::int64_t incx, - std::complex *a, const std::vector &dependencies) { +sycl::event hpr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const std::complex* x, std::int64_t incx, + std::complex* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhpr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chpr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::complex alpha, const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, std::complex *a, - const std::vector &dependencies) { +sycl::event hpr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::complex alpha, const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, std::complex* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhpr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event sbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsbmv_usm_sycl( queue, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* a, const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sspmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, const std::vector &dependencies) { +sycl::event spmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* a, const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dspmv_usm_sycl( queue, upper_lower, n, alpha, a, x, incx, beta, y, incy, dependencies); } -sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, - const std::vector &dependencies) { +sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, - const std::vector &dependencies) { +sycl::event spr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dspr_usm_sycl(queue, upper_lower, n, alpha, x, incx, a, dependencies); } -sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, const std::vector &dependencies) { +sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sspr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event spr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, const std::vector &dependencies) { +sycl::event spr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dspr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, dependencies); } -sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *a, std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* a, std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event symv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, std::int64_t incy, - const std::vector &dependencies) { +sycl::event symv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsymv_usm_sycl( queue, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy, dependencies); } -sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyr_usm_sycl( queue, upper_lower, n, alpha, x, incx, a, lda, dependencies); } -sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - float alpha, const float *x, std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, const std::vector &dependencies) { +sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + float alpha, const float* x, std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event syr2(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, std::int64_t n, - double alpha, const double *x, std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies) { +sycl::event syr2(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, std::int64_t n, + double alpha, const double* x, std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyr2_usm_sycl( queue, upper_lower, n, alpha, x, incx, y, incy, a, lda, dependencies); } -sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_stbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztbmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_stbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, const std::vector &dependencies) { +sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tbsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztbsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, k, a, lda, x, incx, dependencies); } -sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_stpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztpmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, float *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, float* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_stpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, double *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, double* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event tpsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztpsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, x, incx, dependencies); } -sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trmv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trmv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrmv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const float *a, std::int64_t lda, float *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const float* a, std::int64_t lda, float* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const double *a, std::int64_t lda, double *x, - std::int64_t incx, const std::vector &dependencies) { +sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const double* a, std::int64_t lda, double* x, + std::int64_t incx, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event trsv(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - diag unit_diag, std::int64_t n, const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies) { +sycl::event trsv(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + diag unit_diag, std::int64_t n, const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrsv_usm_sycl( queue, upper_lower, trans, unit_diag, n, a, lda, x, incx, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, double beta, double *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, double beta, double* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies) { + const sycl::half* a, std::int64_t lda, const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_hgemm_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16 *a, - std::int64_t lda, const bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event gemm(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const bfloat16* a, + std::int64_t lda, const bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_bf16bf16f32_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_chemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event hemm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event hemm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zhemm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const std::complex *a, - std::int64_t lda, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const std::complex* a, + std::int64_t lda, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event herk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const std::complex *a, - std::int64_t lda, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event herk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const std::complex* a, + std::int64_t lda, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zherk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event her2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event her2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zher2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event symm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event symm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsymm_usm_sycl( queue, left_right, upper_lower, m, n, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syrk(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsyrk_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, float *alpha, - const float **a, std::int64_t *lda, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, float* alpha, + const float** a, std::int64_t* lda, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, double *alpha, - const double **a, std::int64_t *lda, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, double* alpha, + const double** a, std::int64_t* lda, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo *upper_lower, - transpose *trans, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo* upper_lower, + transpose* trans, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsyrk_batch_group_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, - transpose trans, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float beta, float *c, +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, + transpose trans, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event syrk_batch(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, std::complex *c, std::int64_t ldc, + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsyrk_batch_strided_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, stride_a, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - const float *b, std::int64_t ldb, float beta, float *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + const float* b, std::int64_t ldb, float beta, float* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ssyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - const double *b, std::int64_t ldb, double beta, double *c, std::int64_t ldc, - const std::vector &dependencies) { +sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + const double* b, std::int64_t ldb, double beta, double* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_csyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, transpose trans, +sycl::event syr2k(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zsyr2k_usm_sycl( queue, upper_lower, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trmm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trmm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrmm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies) { + const float* a, std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { + const double* a, std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, uplo upper_lower, +sycl::event trsm(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrsm_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side left_right, +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side left_right, uplo upper_lower, transpose trans, diag unit_diag, std::int64_t m, - std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, + std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrsm_batch_strided_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_strsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dtrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ctrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue &queue, side *left_right, - uplo *upper_lower, transpose *trans, diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event trsm_batch(oneapi::mkl::device libkey, sycl::queue& queue, side* left_right, + uplo* upper_lower, transpose* trans, diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_ztrsm_batch_group_usm_sycl( queue, left_right, upper_lower, trans, unit_diag, m, n, alpha, a, lda, b, ldb, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const float **a, std::int64_t *lda, const float **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const float** a, std::int64_t* lda, const float** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double *alpha, const double **a, std::int64_t *lda, const double **b, - std::int64_t *ldb, double *beta, double **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double* alpha, const double** a, std::int64_t* lda, const double** b, + std::int64_t* ldb, double* beta, double** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **b, std::int64_t *ldb, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** b, std::int64_t* ldb, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex *alpha, const std::complex **a, - std::int64_t *lda, const std::complex **b, std::int64_t *ldb, - std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex* alpha, const std::complex** a, + std::int64_t* lda, const std::complex** b, std::int64_t* ldb, + std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - sycl::half *alpha, const sycl::half **a, std::int64_t *lda, - const sycl::half **b, std::int64_t *ldb, sycl::half *beta, sycl::half **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + sycl::half* alpha, const sycl::half** a, std::int64_t* lda, + const sycl::half** b, std::int64_t* ldb, sycl::half* beta, sycl::half** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_hgemm_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const sycl::half **a, std::int64_t *lda, const sycl::half **b, - std::int64_t *ldb, float *beta, float **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const sycl::half** a, std::int64_t* lda, const sycl::half** b, + std::int64_t* ldb, float* beta, float** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *transa, - transpose *transb, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float *alpha, const std::int8_t **a, std::int64_t *lda, - const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies) { +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* transa, + transpose* transb, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float* alpha, const std::int8_t** a, std::int64_t* lda, + const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_group_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, group_count, group_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, - const float *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, + const float* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, - const double *b, std::int64_t ldb, std::int64_t stride_b, double beta, - double *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, + const double* b, std::int64_t ldb, std::int64_t stride_b, double beta, + double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, const std::complex *b, std::int64_t ldb, - std::int64_t stride_b, std::complex beta, std::complex *c, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, const std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::half alpha, const sycl::half *a, std::int64_t lda, - std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::half alpha, const sycl::half* a, std::int64_t lda, + std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_hgemm_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const sycl::half *a, std::int64_t lda, std::int64_t stride_a, - const sycl::half *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const sycl::half* a, std::int64_t lda, std::int64_t stride_a, + const sycl::half* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_f16f16f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8f32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, const std::int8_t *a, std::int64_t lda, std::int64_t stride_a, - const std::int8_t *b, std::int64_t ldb, std::int64_t stride_b, float beta, - std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + float alpha, const std::int8_t* a, std::int64_t lda, std::int64_t stride_a, + const std::int8_t* b, std::int64_t ldb, std::int64_t stride_b, float beta, + std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_batch_strided_usm_sycl( queue, transa, transb, m, n, k, alpha, a, lda, stride_a, b, ldb, stride_b, beta, c, ldc, stride_c, batch_size, dependencies); } -sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, std::int64_t ldb, float beta, - float *c, std::int64_t ldc, const std::vector &dependencies) { + const float* a, std::int64_t lda, const float* b, std::int64_t ldb, float beta, + float* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_sgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, std::int64_t ldb, double beta, - double *c, std::int64_t ldc, const std::vector &dependencies) { + const double* a, std::int64_t lda, const double* b, std::int64_t ldb, double beta, + double* c, std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue &queue, uplo upper_lower, +sycl::event gemmt(oneapi::mkl::device libkey, sycl::queue& queue, uplo upper_lower, transpose transa, transpose transb, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies) { + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zgemmt_usm_sycl( queue, upper_lower, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc, dependencies); } -sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::int8_t *a, std::int64_t lda, - std::int8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::int8_t* a, std::int64_t lda, + std::int8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_s8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::int8_t *b, std::int64_t ldb, std::int8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::int8_t* b, std::int64_t ldb, std::int8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_u8s8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event gemm_bias(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, offset offsetc, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, const std::uint8_t *a, std::int64_t lda, - std::uint8_t ao, const std::uint8_t *b, std::int64_t ldb, std::uint8_t bo, - float beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, - const std::vector &dependencies) { + std::int64_t k, float alpha, const std::uint8_t* a, std::int64_t lda, + std::uint8_t ao, const std::uint8_t* b, std::int64_t ldb, std::uint8_t bo, + float beta, std::int32_t* c, std::int64_t ldc, const std::int32_t* co, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_gemm_u8u8s32_bias_usm_sycl( queue, transa, transb, offsetc, m, n, k, alpha, a, lda, ao, b, ldb, bo, beta, c, ldc, co, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, float *b, std::int64_t ldb, +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, double *b, std::int64_t ldb, +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, a, lda, stride_a, b, ldb, stride_b, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_simatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies) { + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, + std::complex* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zimatcopy_batch_strided_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, stride, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, std::int64_t stride_a, float beta, - const float *b, std::int64_t ldb, std::int64_t stride_b, float *c, + const float* a, std::int64_t lda, std::int64_t stride_a, float beta, + const float* b, std::int64_t ldb, std::int64_t stride_b, float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, std::int64_t stride_a, double beta, - const double *b, std::int64_t ldb, std::int64_t stride_b, double *c, + const double* a, std::int64_t lda, std::int64_t stride_a, double beta, + const double* b, std::int64_t ldb, std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, + std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies) { + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatadd_batch_strided_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, stride_a, beta, b, ldb, stride_b, c, ldc, stride_c, batch_size, dependencies); } -sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - float *b, std::int64_t ldb, const std::vector &dependencies) { +sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + float* b, std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies) { +sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatcopy_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, dependencies); } -sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { +sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, std::int64_t stridea, double *b, std::int64_t ldb, - std::int64_t strideb, const std::vector &dependencies) { +sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, std::int64_t stridea, double* b, std::int64_t ldb, + std::int64_t strideb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - std::complex *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event omatcopy2(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - std::complex *b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::int64_t stridea, + std::complex* b, std::int64_t ldb, std::int64_t strideb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatcopy2_usm_sycl( queue, trans, m, n, alpha, a, lda, stridea, b, ldb, strideb, dependencies); } -sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, float alpha, float *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, float alpha, float* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_simatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, - std::int64_t m, std::int64_t n, double alpha, double *ab, std::int64_t lda, - std::int64_t ldb, const std::vector &dependencies) { +sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, + std::int64_t m, std::int64_t n, double alpha, double* ab, std::int64_t lda, + std::int64_t ldb, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue &queue, transpose trans, +sycl::event imatcopy(oneapi::mkl::device libkey, sycl::queue& queue, transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies) { + std::complex* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zimatcopy_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, dependencies); } -sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, std::int64_t ldb, float *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, std::int64_t ldb, float* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, - transpose transb, std::int64_t m, std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, std::int64_t ldb, double *c, - std::int64_t ldc, const std::vector &dependencies) { +sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, + transpose transb, std::int64_t m, std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, std::int64_t ldb, double* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue &queue, transpose transa, +sycl::event omatadd(oneapi::mkl::device libkey, sycl::queue& queue, transpose transa, transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, std::complex *c, - std::int64_t ldc, const std::vector &dependencies) { + const std::complex* a, std::int64_t lda, std::complex beta, + const std::complex* b, std::int64_t ldb, std::complex* c, + std::int64_t ldc, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatadd_usm_sycl( queue, transa, transb, m, n, alpha, a, lda, beta, b, ldb, c, ldc, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, const float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, const float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_somatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, const double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, const double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_domatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_comatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event omatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zomatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, a, lda, b, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, float *alpha, float **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, float* alpha, float** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_simatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, double *alpha, double **ab, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, double* alpha, double** ab, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_dimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_cimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } -sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue &queue, transpose *trans, - std::int64_t *m, std::int64_t *n, std::complex *alpha, - std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies) { +sycl::event imatcopy_batch(oneapi::mkl::device libkey, sycl::queue& queue, transpose* trans, + std::int64_t* m, std::int64_t* n, std::complex* alpha, + std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].row_major_zimatcopy_batch_group_usm_sycl( queue, trans, m, n, alpha, ab, lda, ldb, group_count, groupsize, dependencies); } diff --git a/src/blas/function_table.hpp b/src/blas/function_table.hpp index a242fd0c0..c821a4a51 100644 --- a/src/blas/function_table.hpp +++ b/src/blas/function_table.hpp @@ -34,4940 +34,4940 @@ typedef struct { // Buffer APIs - void (*column_major_scasum_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_dzasum_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_sasum_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_dasum_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_saxpy_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - void (*column_major_daxpy_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - void (*column_major_caxpy_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zaxpy_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_saxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*column_major_scasum_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_dzasum_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_sasum_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_dasum_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_saxpy_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); + void (*column_major_daxpy_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); + void (*column_major_caxpy_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zaxpy_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_saxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_daxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*column_major_daxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_caxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, + void (*column_major_caxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_zaxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, + void (*column_major_zaxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_saxpby_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_daxpby_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_caxpby_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, + void (*column_major_saxpby_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_daxpby_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_caxpby_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zaxpby_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zaxpby_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_scopy_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_scopy_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); - void (*column_major_dcopy_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + void (*column_major_dcopy_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); - void (*column_major_ccopy_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zcopy_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_scopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*column_major_ccopy_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zcopy_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_scopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_dcopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*column_major_dcopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_ccopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, + void (*column_major_ccopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_zcopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, + void (*column_major_zcopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_sdot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*column_major_ddot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*column_major_dsdot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*column_major_cdotc_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*column_major_zdotc_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*column_major_cdotu_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*column_major_zdotu_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*column_major_isamin_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_idamin_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_icamin_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_izamin_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_isamax_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_idamax_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_icamax_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_izamax_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_scnrm2_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_dznrm2_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*column_major_snrm2_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_dnrm2_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*column_major_srot_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, + void (*column_major_sdot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*column_major_ddot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*column_major_dsdot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*column_major_cdotc_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*column_major_zdotc_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*column_major_cdotu_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*column_major_zdotu_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*column_major_isamin_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_idamin_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_icamin_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_izamin_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_isamax_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_idamax_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_icamax_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_izamax_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_scnrm2_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_dznrm2_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*column_major_snrm2_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_dnrm2_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*column_major_srot_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s); - void (*column_major_drot_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, + void (*column_major_drot_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s); - void (*column_major_csrot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, + void (*column_major_csrot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s); - void (*column_major_zdrot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + void (*column_major_zdrot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s); - void (*column_major_srotg_sycl)(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - void (*column_major_drotg_sycl)(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - void (*column_major_crotg_sycl)(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - void (*column_major_zrotg_sycl)(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - void (*column_major_srotm_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - void (*column_major_drotm_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer ¶m); - void (*column_major_srotmg_sycl)(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - float y1, sycl::buffer ¶m); - void (*column_major_drotmg_sycl)(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - double y1, sycl::buffer ¶m); - void (*column_major_sscal_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dscal_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx); - void (*column_major_cscal_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_csscal_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_zscal_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_zdscal_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_sdsdot_sycl)(sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*column_major_sswap_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + void (*column_major_srotg_sycl)(sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); + void (*column_major_drotg_sycl)(sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); + void (*column_major_crotg_sycl)(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, + sycl::buffer& c, + sycl::buffer, 1>& s); + void (*column_major_zrotg_sycl)(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, + sycl::buffer& c, + sycl::buffer, 1>& s); + void (*column_major_srotm_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); + void (*column_major_drotm_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& param); + void (*column_major_srotmg_sycl)(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, + float y1, sycl::buffer& param); + void (*column_major_drotmg_sycl)(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, + double y1, sycl::buffer& param); + void (*column_major_sscal_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dscal_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx); + void (*column_major_cscal_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_csscal_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_zscal_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_zdscal_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_sdsdot_sycl)(sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*column_major_sswap_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); - void (*column_major_dswap_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, + void (*column_major_dswap_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); - void (*column_major_cswap_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zswap_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_sgbmv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_cswap_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zswap_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_sgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - void (*column_major_dgbmv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + std::int64_t ku, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + void (*column_major_dgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - void (*column_major_cgbmv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + std::int64_t ku, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + void (*column_major_cgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zgbmv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_sgemv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_sgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_dgemv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_dgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_cgemv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_cgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zgemv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_sgemv_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_sgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - float beta, sycl::buffer &y, + float beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_dgemv_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_dgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - double beta, sycl::buffer &y, + double beta, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); void (*column_major_cgemv_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); void (*column_major_zgemv_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*column_major_sdgmm_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*column_major_sdgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - void (*column_major_ddgmm_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*column_major_ddgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); void (*column_major_cdgmm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); void (*column_major_zdgmm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - void (*column_major_sger_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*column_major_dger_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*column_major_cgerc_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + void (*column_major_sger_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*column_major_dger_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*column_major_cgerc_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_zgerc_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_zgerc_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_cgeru_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_cgeru_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_zgeru_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_zgeru_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_chbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_chbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zhbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zhbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_chemv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_chemv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zhemv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zhemv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_cher_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_cher_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_zher_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_zher_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_cher2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_cher2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_zher2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_zher2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*column_major_chpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*column_major_chpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_zhpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_zhpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*column_major_chpr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& y, std::int64_t incy); + void (*column_major_chpr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); - void (*column_major_zhpr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); + void (*column_major_zhpr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &a); - void (*column_major_chpr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& a); + void (*column_major_chpr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); - void (*column_major_zhpr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); + void (*column_major_zhpr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); - void (*column_major_ssbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); + void (*column_major_ssbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_dsbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_dsbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_sspmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_dspmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*column_major_sspr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); - void (*column_major_dspr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a); - void (*column_major_sspr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - void (*column_major_dspr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a); - void (*column_major_ssymv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - void (*column_major_dsymv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - void (*column_major_ssyr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); - void (*column_major_dsyr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &a, std::int64_t lda); - void (*column_major_ssyr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*column_major_dsyr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, - std::int64_t incy, sycl::buffer &a, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_sspmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_dspmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*column_major_sspr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a); + void (*column_major_dspr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a); + void (*column_major_sspr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a); + void (*column_major_dspr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a); + void (*column_major_ssymv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + void (*column_major_dsymv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + void (*column_major_ssyr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a, std::int64_t lda); + void (*column_major_dsyr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& a, std::int64_t lda); + void (*column_major_ssyr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*column_major_dsyr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, + std::int64_t incy, sycl::buffer& a, std::int64_t lda); - void (*column_major_stbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_stbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*column_major_dtbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*column_major_dtbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); - void (*column_major_ctbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_ctbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_ztbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_ztbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_stbsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_stbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*column_major_dtbsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*column_major_dtbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); - void (*column_major_ctbsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_ctbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_ztbsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_ztbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_stpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_stpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dtpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dtpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*column_major_ctpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*column_major_ctpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_ztpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_ztpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_stpsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_stpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dtpsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dtpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*column_major_ctpsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*column_major_ctpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_ztpsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_ztpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*column_major_strmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*column_major_strmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dtrmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dtrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*column_major_ctrmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*column_major_ctrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*column_major_ztrmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_ztrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*column_major_strsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_strsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*column_major_dtrsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*column_major_dtrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*column_major_ctrsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*column_major_ctrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*column_major_ztrsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_ztrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*column_major_sgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*column_major_sgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*column_major_dgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*column_major_dgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc); - void (*column_major_cgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + double beta, sycl::buffer& c, std::int64_t ldc); + void (*column_major_cgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_hgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_hgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - sycl::half beta, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::half beta, sycl::buffer& c, std::int64_t ldc); - void (*column_major_gemm_f16f16f32_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*column_major_gemm_f16f16f32_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); - void (*column_major_gemm_bf16bf16f32_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*column_major_gemm_bf16bf16f32_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, + sycl::buffer& a, std::int64_t lda, - sycl::buffer &b, + sycl::buffer& b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_chemm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_chemm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zhemm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zhemm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_cherk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_cherk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zherk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zherk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_cher2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_cher2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - float beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + float beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*column_major_zher2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_zher2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - double beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + double beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*column_major_ssymm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*column_major_ssymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_dsymm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_dsymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_csymm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_csymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zsymm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zsymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_ssyrk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_ssyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*column_major_dsyrk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*column_major_dsyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - double beta, sycl::buffer &c, std::int64_t ldc); - void (*column_major_csyrk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + double beta, sycl::buffer& c, std::int64_t ldc); + void (*column_major_csyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zsyrk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zsyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_ssyrk_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_ssyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*column_major_dsyrk_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_dsyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_csyrk_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_zsyrk_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*column_major_ssyr2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_ssyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_dsyr2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_dsyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_csyr2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_csyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zsyr2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zsyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_strmm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_strmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_dtrmm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_dtrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_ctrmm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_ctrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_ztrmm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_ztrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_strsm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_strsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_dtrsm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_dtrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_ctrsm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_ctrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_ztrsm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_ztrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); void (*column_major_sgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_dgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, double beta, sycl::buffer &c, std::int64_t ldc, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, double beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_cgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_zgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*column_major_hgemm_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*column_major_hgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_gemm_f16f16f32_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_gemm_s8s8f32_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_gemm_s8s8s32_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_strsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_dtrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_ctrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_ztrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*column_major_sgemmt_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*column_major_sgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_dgemmt_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_dgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*column_major_cgemmt_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*column_major_cgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zgemmt_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); void (*column_major_gemm_s8u8s32_bias_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*column_major_gemm_s8s8s32_bias_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*column_major_gemm_u8s8s32_bias_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*column_major_gemm_u8u8s32_bias_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*column_major_somatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_domatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_comatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*column_major_zomatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*column_major_simatcopy_batch_strided_sycl)(sycl::queue &queue, + void (*column_major_simatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*column_major_dimatcopy_batch_strided_sycl)(sycl::queue &queue, + void (*column_major_dimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*column_major_cimatcopy_batch_strided_sycl)(sycl::queue &queue, + void (*column_major_cimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*column_major_zimatcopy_batch_strided_sycl)(sycl::queue &queue, + void (*column_major_zimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); void (*column_major_somatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_domatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_comatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*column_major_zomatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*column_major_somatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_somatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_domatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_domatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*column_major_comatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*column_major_comatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_zomatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_zomatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*column_major_somatcopy2_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*column_major_somatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); - void (*column_major_domatcopy2_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_domatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); - void (*column_major_comatcopy2_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_comatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); - void (*column_major_zomatcopy2_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_zomatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); - void (*column_major_simatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_simatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); - void (*column_major_dimatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_dimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); - void (*column_major_cimatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_cimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); - void (*column_major_zimatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*column_major_zimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); - void (*column_major_somatadd_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*column_major_somatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, float beta, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &c, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, float beta, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); - void (*column_major_domatadd_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*column_major_domatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, double beta, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &c, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, double beta, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& c, std::int64_t ldc); - void (*column_major_comatadd_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*column_major_comatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*column_major_zomatadd_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*column_major_zomatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs - sycl::event (*column_major_scasum_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies); - sycl::event (*column_major_dzasum_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies); - sycl::event (*column_major_sasum_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies); - sycl::event (*column_major_dasum_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies); - sycl::event (*column_major_saxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, + sycl::event (*column_major_scasum_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + float* result, + const std::vector& dependencies); + sycl::event (*column_major_dzasum_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + double* result, + const std::vector& dependencies); + sycl::event (*column_major_sasum_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies); + sycl::event (*column_major_dasum_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies); + sycl::event (*column_major_saxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_daxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, + const std::vector& dependencies); + sycl::event (*column_major_daxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_caxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, + const std::vector& dependencies); + sycl::event (*column_major_caxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zaxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zaxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*column_major_saxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, float* alpha, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_daxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, double *alpha, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, double* alpha, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_caxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_zaxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_saxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_daxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, double alpha, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_caxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, + sycl::queue& queue, std::int64_t n, std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_zaxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*column_major_saxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, - float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_daxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, - const double beta, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_caxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies); + sycl::event (*column_major_saxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float beta, + float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_daxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, + const double beta, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_caxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, + const std::complex* x, std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zaxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zaxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, + const std::complex* x, std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_scopy_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dcopy_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_ccopy_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zcopy_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_scopy_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dcopy_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_ccopy_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zcopy_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*column_major_scopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_dcopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_ccopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_zcopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_scopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t stridex, - float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_dcopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_ccopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zcopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); - sycl::event (*column_major_sdot_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *result, - const std::vector &dependencies); - sycl::event (*column_major_ddot_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *result, - const std::vector &dependencies); - sycl::event (*column_major_dsdot_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - double *result, - const std::vector &dependencies); - sycl::event (*column_major_cdotc_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*column_major_zdotc_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*column_major_cdotu_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*column_major_zdotu_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*column_major_isamin_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_idamin_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_icamin_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_izamin_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_isamax_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_idamax_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_icamax_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_izamax_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*column_major_scnrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies); - sycl::event (*column_major_dznrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies); - sycl::event (*column_major_snrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies); - sycl::event (*column_major_dnrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies); - sycl::event (*column_major_srot_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, float c, + sycl::queue& queue, std::int64_t n, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); + sycl::event (*column_major_sdot_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* result, + const std::vector& dependencies); + sycl::event (*column_major_ddot_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* result, + const std::vector& dependencies); + sycl::event (*column_major_dsdot_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + double* result, + const std::vector& dependencies); + sycl::event (*column_major_cdotc_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*column_major_zdotc_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*column_major_cdotu_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*column_major_zdotu_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*column_major_isamin_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_idamin_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_icamin_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_izamin_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_isamax_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_idamax_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_icamax_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_izamax_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*column_major_scnrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + float* result, + const std::vector& dependencies); + sycl::event (*column_major_dznrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + double* result, + const std::vector& dependencies); + sycl::event (*column_major_snrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies); + sycl::event (*column_major_dnrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies); + sycl::event (*column_major_srot_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, float c, float s, - const std::vector &dependencies); - sycl::event (*column_major_drot_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, + const std::vector& dependencies); + sycl::event (*column_major_drot_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, double c, double s, - const std::vector &dependencies); - sycl::event (*column_major_csrot_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_csrot_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, float s, - const std::vector &dependencies); - sycl::event (*column_major_zdrot_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zdrot_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, - const std::vector &dependencies); - sycl::event (*column_major_srotg_usm_sycl)(sycl::queue &queue, float *a, float *b, float *c, - float *s, - const std::vector &dependencies); - sycl::event (*column_major_drotg_usm_sycl)(sycl::queue &queue, double *a, double *b, double *c, - double *s, - const std::vector &dependencies); - sycl::event (*column_major_crotg_usm_sycl)(sycl::queue &queue, std::complex *a, - std::complex *b, float *c, - std::complex *s, - const std::vector &dependencies); - sycl::event (*column_major_zrotg_usm_sycl)(sycl::queue &queue, std::complex *a, - std::complex *b, double *c, - std::complex *s, - const std::vector &dependencies); - sycl::event (*column_major_srotm_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - float *param, - const std::vector &dependencies); - sycl::event (*column_major_drotm_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - double *param, - const std::vector &dependencies); - sycl::event (*column_major_srotmg_usm_sycl)(sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, - const std::vector &dependencies); - sycl::event (*column_major_drotmg_usm_sycl)(sycl::queue &queue, double *d1, double *d2, - double *x1, double y1, double *param, - const std::vector &dependencies); - sycl::event (*column_major_sscal_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dscal_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_cscal_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, + const std::vector& dependencies); + sycl::event (*column_major_srotg_usm_sycl)(sycl::queue& queue, float* a, float* b, float* c, + float* s, + const std::vector& dependencies); + sycl::event (*column_major_drotg_usm_sycl)(sycl::queue& queue, double* a, double* b, double* c, + double* s, + const std::vector& dependencies); + sycl::event (*column_major_crotg_usm_sycl)(sycl::queue& queue, std::complex* a, + std::complex* b, float* c, + std::complex* s, + const std::vector& dependencies); + sycl::event (*column_major_zrotg_usm_sycl)(sycl::queue& queue, std::complex* a, + std::complex* b, double* c, + std::complex* s, + const std::vector& dependencies); + sycl::event (*column_major_srotm_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + float* param, + const std::vector& dependencies); + sycl::event (*column_major_drotm_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + double* param, + const std::vector& dependencies); + sycl::event (*column_major_srotmg_usm_sycl)(sycl::queue& queue, float* d1, float* d2, float* x1, + float y1, float* param, + const std::vector& dependencies); + sycl::event (*column_major_drotmg_usm_sycl)(sycl::queue& queue, double* d1, double* d2, + double* x1, double y1, double* param, + const std::vector& dependencies); + sycl::event (*column_major_sscal_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dscal_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_cscal_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_csscal_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, + const std::vector& dependencies); + sycl::event (*column_major_csscal_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_zscal_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_zdscal_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_sdsdot_usm_sycl)(sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies); - sycl::event (*column_major_sswap_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dswap_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_cswap_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zswap_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_sgbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_zscal_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_zdscal_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_sdsdot_usm_sycl)(sycl::queue& queue, std::int64_t n, float sb, + const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* result, + const std::vector& dependencies); + sycl::event (*column_major_sswap_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dswap_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_cswap_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_zswap_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_sgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dgbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies); + std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*column_major_cgbmv_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); - sycl::event (*column_major_zgbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); + sycl::event (*column_major_zgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_sgemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_sgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, + const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dgemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_dgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_cgemv_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); sycl::event (*column_major_zgemv_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); sycl::event (*column_major_sgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, std::int64_t incy, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_dgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, std::int64_t incy, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_cgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex beta, std::complex *y, std::int64_t incy, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_sgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, const float **x, std::int64_t *incx, - float *beta, float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, + float* beta, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_dgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, const double **x, std::int64_t *incx, - double *beta, double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, + double* beta, double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_cgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_zgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_sdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_ddgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_cdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_zdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_sdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, std::int64_t *n, - const float **a, std::int64_t *lda, const float **x, std::int64_t *incx, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n, + const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_ddgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, std::int64_t *n, - const double **a, std::int64_t *lda, const double **x, std::int64_t *incx, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n, + const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_cdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_zdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); - sycl::event (*column_major_sger_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, + sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); + sycl::event (*column_major_sger_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_dger_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, + const std::vector& dependencies); + sycl::event (*column_major_dger_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_cgerc_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::vector& dependencies); + sycl::event (*column_major_cgerc_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_zgerc_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zgerc_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_cgeru_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_cgeru_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_zgeru_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zgeru_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); sycl::event (*column_major_chbmv_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); sycl::event (*column_major_zhbmv_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); - sycl::event (*column_major_chemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); + sycl::event (*column_major_chemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zhemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_zhemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_cher_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_cher_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_zher_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zher_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_cher2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_cher2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_zher2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_zher2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_chpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_chpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_zhpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_zhpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_chpr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_chpr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies); - sycl::event (*column_major_zhpr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies); + sycl::event (*column_major_zhpr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies); - sycl::event (*column_major_chpr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies); + sycl::event (*column_major_chpr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies); - sycl::event (*column_major_zhpr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies); + sycl::event (*column_major_zhpr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies); - sycl::event (*column_major_ssbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies); + sycl::event (*column_major_ssbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, + const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dsbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_dsbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_sspmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *a, - const float *x, std::int64_t incx, float beta, - float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dspmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *a, - const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_sspr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, - const std::vector &dependencies); - sycl::event (*column_major_dspr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, - const std::vector &dependencies); - sycl::event (*column_major_sspr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *a, - const std::vector &dependencies); - sycl::event (*column_major_dspr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, - std::int64_t incy, double *a, - const std::vector &dependencies); - sycl::event (*column_major_ssymv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_dsymv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*column_major_ssyr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_dsyr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_ssyr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_dsyr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, - std::int64_t incy, double *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*column_major_stbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_sspmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* a, + const float* x, std::int64_t incx, float beta, + float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dspmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* a, + const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_sspr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, + const std::vector& dependencies); + sycl::event (*column_major_dspr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, + const std::vector& dependencies); + sycl::event (*column_major_sspr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* a, + const std::vector& dependencies); + sycl::event (*column_major_dspr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, + std::int64_t incy, double* a, + const std::vector& dependencies); + sycl::event (*column_major_ssymv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_dsymv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*column_major_ssyr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_dsyr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_ssyr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_dsyr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, + std::int64_t incy, double* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*column_major_stbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dtbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ctbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ztbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_stbsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_stbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtbsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dtbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctbsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ctbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztbsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ztbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_stpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_stpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dtpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ctpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ztpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_stpsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_stpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtpsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_dtpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctpsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ctpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztpsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ztpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_strmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_strmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, + const float* a, std::int64_t lda, float* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtrmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_dtrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, + const double* a, std::int64_t lda, double* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctrmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ctrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztrmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ztrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_strsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_strsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, + const float* a, std::int64_t lda, float* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_dtrsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_dtrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, + const double* a, std::int64_t lda, double* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ctrsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ctrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_ztrsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_ztrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*column_major_sgemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*column_major_sgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, + const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dgemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::vector& dependencies); + sycl::event (*column_major_dgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_cgemm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_zgemm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_hgemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_hgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, - sycl::half beta, sycl::half *c, std::int64_t ldc, - const std::vector &dependencies); + const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, + sycl::half beta, sycl::half* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_gemm_f16f16f32_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies); sycl::event (*column_major_gemm_bf16bf16f32_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const oneapi::mkl::bfloat16 *a, - std::int64_t lda, const oneapi::mkl::bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const oneapi::mkl::bfloat16* a, + std::int64_t lda, const oneapi::mkl::bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies); sycl::event (*column_major_chemm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_zhemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_zhemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_cherk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_cherk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha, - const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_zherk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_zherk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha, - const std::complex *a, std::int64_t lda, - double beta, std::complex *c, + const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_cher2k_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, float beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, float beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); sycl::event (*column_major_zher2k_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, double beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_ssymm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, double beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_ssymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dsymm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_dsymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_csymm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_zsymm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_zsymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_ssyrk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ssyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, float beta, float *c, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, float beta, float* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dsyrk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_dsyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, double beta, double* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_csyrk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_csyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_zsyrk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_zsyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_ssyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *k, float *alpha, const float **a, std::int64_t *lda, - float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, + float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_dsyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *k, double *alpha, const double **a, std::int64_t *lda, - double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, + double* beta, double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_csyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_zsyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_ssyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_dsyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_csyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex *c, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_zsyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex *c, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*column_major_ssyr2k_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*column_major_ssyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dsyr2k_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_dsyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_csyr2k_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_zsyr2k_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_zsyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_strmm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + const std::vector& dependencies); + sycl::event (*column_major_strmm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_dtrmm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_dtrmm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*column_major_ctrmm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*column_major_ztrmm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); - sycl::event (*column_major_strsm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); + sycl::event (*column_major_strsm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_dtrsm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_dtrsm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*column_major_ctrsm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*column_major_ztrsm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*column_major_strsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_dtrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_ctrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_ztrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_strsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_dtrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_ctrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_ztrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*column_major_sgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, const float **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_dgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, const double **b, std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_cgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_zgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_hgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, sycl::half *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta, + sycl::half** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_gemm_f16f16f32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8f32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8s32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*column_major_sgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_dgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb, + std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_cgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_hgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_gemm_f16f16f32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8f32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8s32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); - sycl::event (*column_major_sgemmt_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); + sycl::event (*column_major_sgemmt_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_dgemmt_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_dgemmt_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_cgemmt_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_zgemmt_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_gemm_s8u8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*column_major_gemm_s8s8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*column_major_gemm_u8s8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*column_major_gemm_u8u8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*column_major_somatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_domatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*column_major_comatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zomatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_simatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, float *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_dimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, double *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_cimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_zimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_somatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float beta, const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_domatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double beta, const double *b, std::int64_t ldb, - std::int64_t stride_b, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, const double* b, std::int64_t ldb, + std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*column_major_comatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*column_major_zomatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); - sycl::event (*column_major_somatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::event (*column_major_somatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_domatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_domatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_comatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_comatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_zomatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_zomatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*column_major_somatcopy2_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stridea, float *b, - std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stridea, float* b, + std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies); sycl::event (*column_major_domatcopy2_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stridea, double *b, - std::int64_t ldb, std::int64_t strideb, const std::vector &dependencies); - sycl::event (*column_major_comatcopy2_usm_sycl)(sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stridea, double* b, + std::int64_t ldb, std::int64_t strideb, const std::vector& dependencies); + sycl::event (*column_major_comatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*column_major_zomatcopy2_usm_sycl)(sycl::queue &queue, + const std::vector& dependencies); + sycl::event (*column_major_zomatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*column_major_simatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_simatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_dimatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_dimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_cimatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*column_major_cimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_zimatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*column_major_zimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*column_major_somatadd_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::vector& dependencies); + sycl::event (*column_major_somatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, - std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*column_major_domatadd_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, + std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*column_major_domatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, - std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_comatadd_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*column_major_zomatadd_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*column_major_zomatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, + const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*column_major_somatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*column_major_domatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*column_major_comatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*column_major_zomatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*column_major_simatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, float **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*column_major_dimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, double **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*column_major_cimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*column_major_zimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, std::complex **ab, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); // Buffer APIs - void (*row_major_scasum_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_dzasum_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_sasum_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_dasum_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_saxpy_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - void (*row_major_daxpy_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy); - void (*row_major_caxpy_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zaxpy_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_saxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*row_major_scasum_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_dzasum_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_sasum_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_dasum_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_saxpy_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); + void (*row_major_daxpy_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy); + void (*row_major_caxpy_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zaxpy_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_saxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_daxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*row_major_daxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_caxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, + void (*row_major_caxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_zaxpy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, + void (*row_major_zaxpy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_saxpby_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_daxpby_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_caxpby_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + void (*row_major_saxpby_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_daxpby_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_caxpby_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); - void (*row_major_zaxpby_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, + void (*row_major_zaxpby_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_scopy_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - void (*row_major_dcopy_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - void (*row_major_ccopy_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zcopy_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_scopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_scopy_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + void (*row_major_dcopy_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + void (*row_major_ccopy_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zcopy_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_scopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_dcopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer &x, std::int64_t incx, - std::int64_t stridex, sycl::buffer &y, + void (*row_major_dcopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer& x, std::int64_t incx, + std::int64_t stridex, sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_ccopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, + void (*row_major_ccopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_zcopy_batch_strided_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, + void (*row_major_zcopy_batch_strided_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &y, + sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_sdot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*row_major_ddot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*row_major_dsdot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*row_major_cdotc_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*row_major_zdotc_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*row_major_cdotu_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*row_major_zdotu_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &result); - void (*row_major_isamin_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_idamin_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_icamin_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_izamin_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_isamax_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_idamax_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_icamax_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_izamax_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_scnrm2_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_dznrm2_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer &result); - void (*row_major_snrm2_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_dnrm2_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &result); - void (*row_major_srot_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, float c, + void (*row_major_sdot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*row_major_ddot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*row_major_dsdot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*row_major_cdotc_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*row_major_zdotc_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*row_major_cdotu_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*row_major_zdotu_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& result); + void (*row_major_isamin_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_idamin_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_icamin_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_izamin_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_isamax_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_idamax_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_icamax_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_izamax_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_scnrm2_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_dznrm2_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer& result); + void (*row_major_snrm2_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_dnrm2_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& result); + void (*row_major_srot_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, float c, float s); - void (*row_major_drot_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, + void (*row_major_drot_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, double c, double s); - void (*row_major_csrot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, + void (*row_major_csrot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, float c, float s); - void (*row_major_zdrot_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, + void (*row_major_zdrot_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, double c, double s); - void (*row_major_srotg_sycl)(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - void (*row_major_drotg_sycl)(sycl::queue &queue, sycl::buffer &a, - sycl::buffer &b, sycl::buffer &c, - sycl::buffer &s); - void (*row_major_crotg_sycl)(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, sycl::buffer &c, - sycl::buffer, 1> &s); - void (*row_major_zrotg_sycl)(sycl::queue &queue, sycl::buffer, 1> &a, - sycl::buffer, 1> &b, - sycl::buffer &c, - sycl::buffer, 1> &s); - void (*row_major_srotm_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - void (*row_major_drotm_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy, - sycl::buffer ¶m); - void (*row_major_srotmg_sycl)(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, float y1, - sycl::buffer ¶m); - void (*row_major_drotmg_sycl)(sycl::queue &queue, sycl::buffer &d1, - sycl::buffer &d2, sycl::buffer &x1, - double y1, sycl::buffer ¶m); - void (*row_major_sscal_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dscal_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx); - void (*row_major_cscal_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_csscal_sycl)(sycl::queue &queue, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_zscal_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_zdscal_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_sdsdot_sycl)(sycl::queue &queue, std::int64_t n, float sb, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &result); - void (*row_major_sswap_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - void (*row_major_dswap_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &x, - std::int64_t incx, sycl::buffer &y, std::int64_t incy); - void (*row_major_cswap_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zswap_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_sgbmv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, + void (*row_major_srotg_sycl)(sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); + void (*row_major_drotg_sycl)(sycl::queue& queue, sycl::buffer& a, + sycl::buffer& b, sycl::buffer& c, + sycl::buffer& s); + void (*row_major_crotg_sycl)(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, sycl::buffer& c, + sycl::buffer, 1>& s); + void (*row_major_zrotg_sycl)(sycl::queue& queue, sycl::buffer, 1>& a, + sycl::buffer, 1>& b, + sycl::buffer& c, + sycl::buffer, 1>& s); + void (*row_major_srotm_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); + void (*row_major_drotm_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy, + sycl::buffer& param); + void (*row_major_srotmg_sycl)(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, float y1, + sycl::buffer& param); + void (*row_major_drotmg_sycl)(sycl::queue& queue, sycl::buffer& d1, + sycl::buffer& d2, sycl::buffer& x1, + double y1, sycl::buffer& param); + void (*row_major_sscal_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dscal_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx); + void (*row_major_cscal_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_csscal_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_zscal_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_zdscal_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_sdsdot_sycl)(sycl::queue& queue, std::int64_t n, float sb, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& result); + void (*row_major_sswap_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + void (*row_major_dswap_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& x, + std::int64_t incx, sycl::buffer& y, std::int64_t incy); + void (*row_major_cswap_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zswap_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_sgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_dgbmv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_dgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_cgbmv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_cgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zgbmv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zgbmv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_sgemv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - void (*row_major_dgemv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - void (*row_major_cgemv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_sgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + void (*row_major_dgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + void (*row_major_cgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); - void (*row_major_zgemv_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, + void (*row_major_zgemv_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_sgemv_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_sgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, float beta, - sycl::buffer &y, std::int64_t incy, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_dgemv_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_dgemv_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, double beta, - sycl::buffer &y, std::int64_t incy, + sycl::buffer& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); void (*row_major_cgemv_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); void (*row_major_zgemv_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &x, std::int64_t incx, - std::int64_t stridex, std::complex beta, sycl::buffer, 1> &y, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& x, std::int64_t incx, + std::int64_t stridex, std::complex beta, sycl::buffer, 1>& y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size); - void (*row_major_sdgmm_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*row_major_sdgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - void (*row_major_ddgmm_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*row_major_ddgmm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &x, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& x, std::int64_t incx, std::int64_t stridex, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); void (*row_major_cdgmm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); void (*row_major_zdgmm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &x, std::int64_t incx, std::int64_t stridex, - sycl::buffer, 1> &c, std::int64_t ldc, std::int64_t stridec, + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, + sycl::buffer, 1>& x, std::int64_t incx, std::int64_t stridex, + sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size); - void (*row_major_sger_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*row_major_dger_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*row_major_cgerc_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + void (*row_major_sger_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*row_major_dger_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double alpha, + sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*row_major_cgerc_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_zgerc_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + void (*row_major_zgerc_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*row_major_cgeru_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*row_major_cgeru_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_zgeru_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + void (*row_major_zgeru_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*row_major_chbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*row_major_chbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); - void (*row_major_zhbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + void (*row_major_zhbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_chemv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_chemv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_zhemv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_zhemv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_cher_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_cher_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_zher_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a, + void (*row_major_zher_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_cher2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a, + void (*row_major_cher2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a, std::int64_t lda); - void (*row_major_zher2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + void (*row_major_zher2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a, std::int64_t lda); - void (*row_major_chpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, - std::complex beta, sycl::buffer, 1> &y, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a, std::int64_t lda); + void (*row_major_chpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, + std::complex beta, sycl::buffer, 1>& y, std::int64_t incy); - void (*row_major_zhpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + void (*row_major_zhpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx, + sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx, std::complex beta, - sycl::buffer, 1> &y, std::int64_t incy); - void (*row_major_chpr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); - void (*row_major_zhpr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &a); - void (*row_major_chpr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &x, - std::int64_t incx, sycl::buffer, 1> &y, - std::int64_t incy, sycl::buffer, 1> &a); - void (*row_major_zhpr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + sycl::buffer, 1>& y, std::int64_t incy); + void (*row_major_chpr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); + void (*row_major_zhpr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& a); + void (*row_major_chpr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& x, + std::int64_t incx, sycl::buffer, 1>& y, + std::int64_t incy, sycl::buffer, 1>& a); + void (*row_major_zhpr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &x, std::int64_t incx, - sycl::buffer, 1> &y, std::int64_t incy, - sycl::buffer, 1> &a); - void (*row_major_ssbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - float beta, sycl::buffer &y, std::int64_t incy); - void (*row_major_dsbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx, - double beta, sycl::buffer &y, std::int64_t incy); - void (*row_major_sspmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, sycl::buffer &x, - std::int64_t incx, float beta, sycl::buffer &y, + sycl::buffer, 1>& x, std::int64_t incx, + sycl::buffer, 1>& y, std::int64_t incy, + sycl::buffer, 1>& a); + void (*row_major_ssbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + float beta, sycl::buffer& y, std::int64_t incy); + void (*row_major_dsbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx, + double beta, sycl::buffer& y, std::int64_t incy); + void (*row_major_sspmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, sycl::buffer& x, + std::int64_t incx, float beta, sycl::buffer& y, std::int64_t incy); - void (*row_major_dspmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_sspr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a); - void (*row_major_dspr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a); - void (*row_major_sspr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - void (*row_major_dspr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a); - void (*row_major_ssymv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, float beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_dsymv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx, double beta, - sycl::buffer &y, std::int64_t incy); - void (*row_major_ssyr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda); - void (*row_major_dsyr_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &a, std::int64_t lda); - void (*row_major_ssyr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - float alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*row_major_dsyr2_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, - double alpha, sycl::buffer &x, std::int64_t incx, - sycl::buffer &y, std::int64_t incy, - sycl::buffer &a, std::int64_t lda); - void (*row_major_stbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_dspmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_sspr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); + void (*row_major_dspr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a); + void (*row_major_sspr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a); + void (*row_major_dspr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a); + void (*row_major_ssymv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, float beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_dsymv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx, double beta, + sycl::buffer& y, std::int64_t incy); + void (*row_major_ssyr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + void (*row_major_dsyr_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& a, std::int64_t lda); + void (*row_major_ssyr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + float alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*row_major_dsyr2_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, + double alpha, sycl::buffer& x, std::int64_t incx, + sycl::buffer& y, std::int64_t incy, + sycl::buffer& a, std::int64_t lda); + void (*row_major_stbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*row_major_dtbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*row_major_dtbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*row_major_ctbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*row_major_ctbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_ztbmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_ztbmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_stbsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_stbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*row_major_dtbsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*row_major_dtbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &x, std::int64_t incx); - void (*row_major_ctbsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& x, std::int64_t incx); + void (*row_major_ctbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_ztbsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_ztbsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, std::int64_t k, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_stpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_stpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dtpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dtpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*row_major_ctpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*row_major_ctpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_ztpmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_ztpmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_stpsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_stpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dtpsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dtpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, - sycl::buffer &x, std::int64_t incx); - void (*row_major_ctpsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, + sycl::buffer& x, std::int64_t incx); + void (*row_major_ctpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_ztpsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_ztpsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - sycl::buffer, 1> &x, std::int64_t incx); - void (*row_major_strmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer, 1>& a, + sycl::buffer, 1>& x, std::int64_t incx); + void (*row_major_strmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dtrmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dtrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*row_major_ctrmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*row_major_ctrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*row_major_ztrmv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_ztrmv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*row_major_strsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_strsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*row_major_dtrsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*row_major_dtrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &x, std::int64_t incx); - void (*row_major_ctrsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& x, std::int64_t incx); + void (*row_major_ctrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*row_major_ztrsv_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_ztrsv_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, - std::int64_t n, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &x, + std::int64_t n, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& x, std::int64_t incx); - void (*row_major_sgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*row_major_sgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_dgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_dgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, double alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - double beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_cgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + std::int64_t k, double alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + double beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_cgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_zgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*row_major_zgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_hgemm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_hgemm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::half alpha, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::half beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_gemm_f16f16f32_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + std::int64_t k, sycl::half alpha, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::half beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_gemm_f16f16f32_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_gemm_bf16bf16f32_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_gemm_bf16bf16f32_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, + sycl::buffer& a, std::int64_t lda, - sycl::buffer &b, - std::int64_t ldb, float beta, sycl::buffer &c, + sycl::buffer& b, + std::int64_t ldb, float beta, sycl::buffer& c, std::int64_t ldc); - void (*row_major_chemm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*row_major_chemm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zhemm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zhemm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_cherk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_cherk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer, 1> &a, + float alpha, sycl::buffer, 1>& a, std::int64_t lda, float beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zherk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zherk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer, 1> &a, + double alpha, sycl::buffer, 1>& a, std::int64_t lda, double beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_cher2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_cher2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - float beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + float beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_zher2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_zher2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - double beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + double beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_ssymm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*row_major_ssymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_dsymm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_dsymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_csymm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_csymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zsymm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zsymm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_ssyrk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_ssyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_dsyrk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_dsyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - double beta, sycl::buffer &c, std::int64_t ldc); - void (*row_major_csyrk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + double beta, sycl::buffer& c, std::int64_t ldc); + void (*row_major_csyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - std::complex alpha, sycl::buffer, 1> &a, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zsyrk_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zsyrk_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_ssyrk_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_ssyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_dsyrk_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_dsyrk_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, double beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_csyrk_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_zsyrk_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_ssyr2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_ssyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_dsyr2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_dsyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_csyr2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_csyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_zsyr2k_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_zsyr2k_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_strmm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_strmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_dtrmm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_dtrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_ctrmm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_ctrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); - void (*row_major_ztrmm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*row_major_ztrmm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*row_major_strsm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*row_major_strsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_dtrsm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + float alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_dtrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_ctrsm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + double alpha, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_ctrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, - std::int64_t lda, sycl::buffer, 1> &b, + std::complex alpha, sycl::buffer, 1>& a, + std::int64_t lda, sycl::buffer, 1>& b, std::int64_t ldb); - void (*row_major_ztrsm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + void (*row_major_ztrsm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*row_major_sgemm_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*row_major_sgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_dgemm_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*row_major_dgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, double beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_cgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_zgemm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer, 1> &b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_hgemm_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*row_major_hgemm_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, sycl::half beta, - sycl::buffer &c, std::int64_t ldc, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_gemm_f16f16f32_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, float beta, sycl::buffer &c, std::int64_t ldc, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, float beta, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_gemm_s8s8f32_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_gemm_s8s8s32_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, float beta, - sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, float beta, + sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_strsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + float alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_dtrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, + double alpha, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_ctrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_ztrsm_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*row_major_sgemmt_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_sgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, float beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_dgemmt_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, float beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_dgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, double beta, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_cgemmt_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, double beta, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_cgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, - std::complex beta, sycl::buffer, 1> &c, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, + std::complex beta, sycl::buffer, 1>& c, std::int64_t ldc); - void (*row_major_zgemmt_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + void (*row_major_zgemmt_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb, std::complex beta, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& c, std::int64_t ldc); void (*row_major_gemm_s8u8s32_bias_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*row_major_gemm_s8s8s32_bias_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, int8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, int8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*row_major_gemm_u8s8s32_bias_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, int8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); + sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, int8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); void (*row_major_gemm_u8u8s32_bias_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - sycl::buffer &a, std::int64_t lda, uint8_t ao, sycl::buffer &b, - std::int64_t ldb, uint8_t bo, float beta, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &co); - void (*row_major_somatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, uint8_t ao, sycl::buffer& b, + std::int64_t ldb, uint8_t bo, float beta, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& co); + void (*row_major_somatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*row_major_domatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_domatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_comatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); void (*row_major_zomatcopy_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer, 1> &b, std::int64_t ldb, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - void (*row_major_simatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_simatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*row_major_dimatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_dimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*row_major_cimatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_cimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); - void (*row_major_zimatcopy_batch_strided_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_zimatcopy_batch_strided_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, std::int64_t batch_size); void (*row_major_somatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, float alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, float beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, float alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, float beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_domatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, double alpha, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, double beta, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer &c, std::int64_t ldc, std::int64_t stride_c, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, double alpha, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, double beta, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_comatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); void (*row_major_zomatadd_batch_strided_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, std::int64_t stride_a, - std::complex beta, sycl::buffer, 1> &b, std::int64_t ldb, - std::int64_t stride_b, sycl::buffer, 1> &c, std::int64_t ldc, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stride_a, + std::complex beta, sycl::buffer, 1>& b, std::int64_t ldb, + std::int64_t stride_b, sycl::buffer, 1>& c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size); - void (*row_major_somatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_somatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_domatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_domatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb); - void (*row_major_comatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb); + void (*row_major_comatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*row_major_zomatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*row_major_zomatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - sycl::buffer, 1> &b, std::int64_t ldb); - void (*row_major_somatcopy2_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::buffer, 1>& a, std::int64_t lda, + sycl::buffer, 1>& b, std::int64_t ldb); + void (*row_major_somatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); - void (*row_major_domatcopy2_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_domatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer &b, + sycl::buffer& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer& b, std::int64_t ldb, std::int64_t strideb); - void (*row_major_comatcopy2_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_comatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, - std::int64_t stridea, sycl::buffer, 1> &b, + sycl::buffer, 1>& a, std::int64_t lda, + std::int64_t stridea, sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); - void (*row_major_zomatcopy2_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_zomatcopy2_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::int64_t stridea, - sycl::buffer, 1> &b, std::int64_t ldb, + sycl::buffer, 1>& b, std::int64_t ldb, std::int64_t strideb); - void (*row_major_simatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_simatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); - void (*row_major_dimatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_dimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - sycl::buffer &ab, std::int64_t lda, + sycl::buffer& ab, std::int64_t lda, std::int64_t ldb); - void (*row_major_cimatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_cimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); - void (*row_major_zimatcopy_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + void (*row_major_zimatcopy_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &ab, std::int64_t lda, + sycl::buffer, 1>& ab, std::int64_t lda, std::int64_t ldb); - void (*row_major_somatadd_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + void (*row_major_somatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - float alpha, sycl::buffer &a, std::int64_t lda, - float beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_domatadd_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + float alpha, sycl::buffer& a, std::int64_t lda, + float beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_domatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - double alpha, sycl::buffer &a, std::int64_t lda, - double beta, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &c, std::int64_t ldc); - void (*row_major_comatadd_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + double alpha, sycl::buffer& a, std::int64_t lda, + double beta, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& c, std::int64_t ldc); + void (*row_major_comatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); - void (*row_major_zomatadd_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); + void (*row_major_zomatadd_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - sycl::buffer, 1> &a, std::int64_t lda, + sycl::buffer, 1>& a, std::int64_t lda, std::complex beta, - sycl::buffer, 1> &b, std::int64_t ldb, - sycl::buffer, 1> &c, std::int64_t ldc); + sycl::buffer, 1>& b, std::int64_t ldb, + sycl::buffer, 1>& c, std::int64_t ldc); // USM APIs - sycl::event (*row_major_scasum_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies); - sycl::event (*row_major_dzasum_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies); - sycl::event (*row_major_sasum_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies); - sycl::event (*row_major_dasum_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies); - sycl::event (*row_major_saxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, float *y, + sycl::event (*row_major_scasum_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + float* result, + const std::vector& dependencies); + sycl::event (*row_major_dzasum_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + double* result, + const std::vector& dependencies); + sycl::event (*row_major_sasum_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies); + sycl::event (*row_major_dasum_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies); + sycl::event (*row_major_saxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_daxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, double *y, + const std::vector& dependencies); + sycl::event (*row_major_daxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_caxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, std::complex *y, + const std::vector& dependencies); + sycl::event (*row_major_caxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zaxpy_usm_sycl)(sycl::queue &queue, std::int64_t n, + const std::vector& dependencies); + sycl::event (*row_major_zaxpy_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_saxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, float *alpha, const float **x, std::int64_t *incx, - float **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, float* alpha, const float** x, std::int64_t* incx, + float** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_daxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, double *alpha, const double **x, std::int64_t *incx, - double **y, std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, double* alpha, const double** x, std::int64_t* incx, + double** y, std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_caxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_zaxpy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, std::complex *alpha, - const std::complex **x, std::int64_t *incx, std::complex **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, std::complex* alpha, + const std::complex** x, std::int64_t* incx, std::complex** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_saxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, float alpha, const float *x, std::int64_t incx, - std::int64_t stridex, float *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, float alpha, const float* x, std::int64_t incx, + std::int64_t stridex, float* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_daxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, double alpha, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, double alpha, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_caxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, std::complex alpha, const std::complex *x, - std::int64_t incx, std::int64_t stridex, std::complex *y, std::int64_t incy, + sycl::queue& queue, std::int64_t n, std::complex alpha, const std::complex* x, + std::int64_t incx, std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_zaxpy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*row_major_saxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - const float *x, std::int64_t incx, const float beta, - float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_daxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - const double *x, std::int64_t incx, const double beta, - double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_caxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::complex alpha, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies); + sycl::event (*row_major_saxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + const float* x, std::int64_t incx, const float beta, + float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_daxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + const double* x, std::int64_t incx, const double beta, + double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_caxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex beta, std::complex *y, + const std::complex* x, std::int64_t incx, + const std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zaxpby_usm_sycl)(sycl::queue &queue, std::int64_t n, + const std::vector& dependencies); + sycl::event (*row_major_zaxpby_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, + const std::complex* x, std::int64_t incx, const std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_scopy_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dcopy_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_ccopy_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zcopy_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_scopy_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_dcopy_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_ccopy_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_zcopy_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_scopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const float **x, std::int64_t *incx, float **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const float** x, std::int64_t* incx, float** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_dcopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const double **x, std::int64_t *incx, double **y, - std::int64_t *incy, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const double** x, std::int64_t* incx, double** y, + std::int64_t* incy, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_ccopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_zcopy_batch_group_usm_sycl)( - sycl::queue &queue, std::int64_t *n, const std::complex **x, std::int64_t *incx, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t* n, const std::complex** x, std::int64_t* incx, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_scopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const float *x, std::int64_t incx, std::int64_t stridex, - float *y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const float* x, std::int64_t incx, std::int64_t stridex, + float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_dcopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const double *x, std::int64_t incx, - std::int64_t stridex, double *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const double* x, std::int64_t incx, + std::int64_t stridex, double* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_ccopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, std::int64_t n, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zcopy_batch_strided_usm_sycl)( - sycl::queue &queue, std::int64_t n, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); - sycl::event (*row_major_sdot_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *result, - const std::vector &dependencies); - sycl::event (*row_major_ddot_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *result, - const std::vector &dependencies); - sycl::event (*row_major_dsdot_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - double *result, - const std::vector &dependencies); - sycl::event (*row_major_cdotc_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*row_major_zdotc_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*row_major_cdotu_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*row_major_zdotu_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *result, - const std::vector &dependencies); - sycl::event (*row_major_isamin_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_idamin_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_icamin_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_izamin_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_isamax_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_idamax_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_icamax_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_izamax_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - std::int64_t *result, - const std::vector &dependencies); - sycl::event (*row_major_scnrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - float *result, - const std::vector &dependencies); - sycl::event (*row_major_dznrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, - const std::complex *x, std::int64_t incx, - double *result, - const std::vector &dependencies); - sycl::event (*row_major_snrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, const float *x, - std::int64_t incx, float *result, - const std::vector &dependencies); - sycl::event (*row_major_dnrm2_usm_sycl)(sycl::queue &queue, std::int64_t n, const double *x, - std::int64_t incx, double *result, - const std::vector &dependencies); - sycl::event (*row_major_srot_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, float c, - float s, const std::vector &dependencies); - sycl::event (*row_major_drot_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, double c, - double s, const std::vector &dependencies); - sycl::event (*row_major_csrot_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, float c, - float s, const std::vector &dependencies); - sycl::event (*row_major_zdrot_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, + sycl::queue& queue, std::int64_t n, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); + sycl::event (*row_major_sdot_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* result, + const std::vector& dependencies); + sycl::event (*row_major_ddot_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* result, + const std::vector& dependencies); + sycl::event (*row_major_dsdot_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + double* result, + const std::vector& dependencies); + sycl::event (*row_major_cdotc_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*row_major_zdotc_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*row_major_cdotu_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*row_major_zdotu_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* result, + const std::vector& dependencies); + sycl::event (*row_major_isamin_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_idamin_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_icamin_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_izamin_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_isamax_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_idamax_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_icamax_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_izamax_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + std::int64_t* result, + const std::vector& dependencies); + sycl::event (*row_major_scnrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + float* result, + const std::vector& dependencies); + sycl::event (*row_major_dznrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, + const std::complex* x, std::int64_t incx, + double* result, + const std::vector& dependencies); + sycl::event (*row_major_snrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, const float* x, + std::int64_t incx, float* result, + const std::vector& dependencies); + sycl::event (*row_major_dnrm2_usm_sycl)(sycl::queue& queue, std::int64_t n, const double* x, + std::int64_t incx, double* result, + const std::vector& dependencies); + sycl::event (*row_major_srot_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, float c, + float s, const std::vector& dependencies); + sycl::event (*row_major_drot_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, double c, + double s, const std::vector& dependencies); + sycl::event (*row_major_csrot_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, float c, + float s, const std::vector& dependencies); + sycl::event (*row_major_zdrot_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, double c, double s, - const std::vector &dependencies); - sycl::event (*row_major_srotg_usm_sycl)(sycl::queue &queue, float *a, float *b, float *c, - float *s, const std::vector &dependencies); - sycl::event (*row_major_drotg_usm_sycl)(sycl::queue &queue, double *a, double *b, double *c, - double *s, - const std::vector &dependencies); - sycl::event (*row_major_crotg_usm_sycl)(sycl::queue &queue, std::complex *a, - std::complex *b, float *c, - std::complex *s, - const std::vector &dependencies); - sycl::event (*row_major_zrotg_usm_sycl)(sycl::queue &queue, std::complex *a, - std::complex *b, double *c, - std::complex *s, - const std::vector &dependencies); - sycl::event (*row_major_srotm_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - float *param, - const std::vector &dependencies); - sycl::event (*row_major_drotm_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - double *param, - const std::vector &dependencies); - sycl::event (*row_major_srotmg_usm_sycl)(sycl::queue &queue, float *d1, float *d2, float *x1, - float y1, float *param, - const std::vector &dependencies); - sycl::event (*row_major_drotmg_usm_sycl)(sycl::queue &queue, double *d1, double *d2, double *x1, - double y1, double *param, - const std::vector &dependencies); - sycl::event (*row_major_sscal_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dscal_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_cscal_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, + const std::vector& dependencies); + sycl::event (*row_major_srotg_usm_sycl)(sycl::queue& queue, float* a, float* b, float* c, + float* s, const std::vector& dependencies); + sycl::event (*row_major_drotg_usm_sycl)(sycl::queue& queue, double* a, double* b, double* c, + double* s, + const std::vector& dependencies); + sycl::event (*row_major_crotg_usm_sycl)(sycl::queue& queue, std::complex* a, + std::complex* b, float* c, + std::complex* s, + const std::vector& dependencies); + sycl::event (*row_major_zrotg_usm_sycl)(sycl::queue& queue, std::complex* a, + std::complex* b, double* c, + std::complex* s, + const std::vector& dependencies); + sycl::event (*row_major_srotm_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + float* param, + const std::vector& dependencies); + sycl::event (*row_major_drotm_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + double* param, + const std::vector& dependencies); + sycl::event (*row_major_srotmg_usm_sycl)(sycl::queue& queue, float* d1, float* d2, float* x1, + float y1, float* param, + const std::vector& dependencies); + sycl::event (*row_major_drotmg_usm_sycl)(sycl::queue& queue, double* d1, double* d2, double* x1, + double y1, double* param, + const std::vector& dependencies); + sycl::event (*row_major_sscal_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dscal_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_cscal_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_csscal_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex alpha, std::complex *x, + const std::vector& dependencies); + sycl::event (*row_major_csscal_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex alpha, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_zscal_usm_sycl)(sycl::queue &queue, std::int64_t n, float alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_zdscal_usm_sycl)(sycl::queue &queue, std::int64_t n, double alpha, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_sdsdot_usm_sycl)(sycl::queue &queue, std::int64_t n, float sb, - const float *x, std::int64_t incx, const float *y, - std::int64_t incy, float *result, - const std::vector &dependencies); - sycl::event (*row_major_sswap_usm_sycl)(sycl::queue &queue, std::int64_t n, float *x, - std::int64_t incx, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dswap_usm_sycl)(sycl::queue &queue, std::int64_t n, double *x, - std::int64_t incx, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_cswap_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zswap_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *x, std::int64_t incx, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_sgbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_zscal_usm_sycl)(sycl::queue& queue, std::int64_t n, float alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_zdscal_usm_sycl)(sycl::queue& queue, std::int64_t n, double alpha, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_sdsdot_usm_sycl)(sycl::queue& queue, std::int64_t n, float sb, + const float* x, std::int64_t incx, const float* y, + std::int64_t incy, float* result, + const std::vector& dependencies); + sycl::event (*row_major_sswap_usm_sycl)(sycl::queue& queue, std::int64_t n, float* x, + std::int64_t incx, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_dswap_usm_sycl)(sycl::queue& queue, std::int64_t n, double* x, + std::int64_t incx, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_cswap_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_zswap_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* x, std::int64_t incx, + std::complex* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_sgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dgbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + std::int64_t ku, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_dgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, - std::int64_t ku, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies); + std::int64_t ku, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_cgbmv_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); - sycl::event (*row_major_zgbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t kl, std::int64_t ku, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); + sycl::event (*row_major_zgbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t kl, std::int64_t ku, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_sgemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_sgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, + const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dgemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_dgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_cgemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_cgemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_zgemv_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); sycl::event (*row_major_sgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stridea, const float *x, - std::int64_t incx, std::int64_t stridex, float beta, float *y, std::int64_t incy, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stridea, const float* x, + std::int64_t incx, std::int64_t stridex, float beta, float* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_dgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stridea, const double *x, - std::int64_t incx, std::int64_t stridex, double beta, double *y, std::int64_t incy, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stridea, const double* x, + std::int64_t incx, std::int64_t stridex, double beta, double* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_cgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex beta, std::complex *y, std::int64_t incy, std::int64_t stridey, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zgemv_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stridea, const std::complex *x, std::int64_t incx, - std::int64_t stridex, std::complex beta, std::complex *y, std::int64_t incy, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stridea, const std::complex* x, std::int64_t incx, + std::int64_t stridex, std::complex beta, std::complex* y, std::int64_t incy, std::int64_t stridey, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_sgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, const float **x, std::int64_t *incx, - float *beta, float **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, + float* beta, float** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_dgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, const double **x, std::int64_t *incx, - double *beta, double **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, + double* beta, double** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_cgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_zgemv_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - const std::complex **x, std::int64_t *incx, std::complex *beta, - std::complex **y, std::int64_t *incy, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + const std::complex** x, std::int64_t* incx, std::complex* beta, + std::complex** y, std::int64_t* incy, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_sdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - const float *a, std::int64_t lda, std::int64_t stridea, const float *x, std::int64_t incx, - std::int64_t stridex, float *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + const float* a, std::int64_t lda, std::int64_t stridea, const float* x, std::int64_t incx, + std::int64_t stridex, float* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_ddgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - const double *a, std::int64_t lda, std::int64_t stridea, const double *x, std::int64_t incx, - std::int64_t stridex, double *c, std::int64_t ldc, std::int64_t stridec, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + const double* a, std::int64_t lda, std::int64_t stridea, const double* x, std::int64_t incx, + std::int64_t stridex, double* c, std::int64_t ldc, std::int64_t stridec, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_cdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_zdgmm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, - const std::complex *a, std::int64_t lda, std::int64_t stridea, - const std::complex *x, std::int64_t incx, std::int64_t stridex, - std::complex *c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side left_right, std::int64_t m, std::int64_t n, + const std::complex* a, std::int64_t lda, std::int64_t stridea, + const std::complex* x, std::int64_t incx, std::int64_t stridex, + std::complex* c, std::int64_t ldc, std::int64_t stridec, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_sdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, std::int64_t *n, - const float **a, std::int64_t *lda, const float **x, std::int64_t *incx, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n, + const float** a, std::int64_t* lda, const float** x, std::int64_t* incx, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_ddgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, std::int64_t *n, - const double **a, std::int64_t *lda, const double **x, std::int64_t *incx, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n, + const double** a, std::int64_t* lda, const double** x, std::int64_t* incx, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_cdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_zdgmm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, std::int64_t *m, std::int64_t *n, - const std::complex **a, std::int64_t *lda, const std::complex **x, - std::int64_t *incx, std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); - sycl::event (*row_major_sger_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - float alpha, const float *x, std::int64_t incx, - const float *y, std::int64_t incy, float *a, + sycl::queue& queue, oneapi::mkl::side* left_right, std::int64_t* m, std::int64_t* n, + const std::complex** a, std::int64_t* lda, const std::complex** x, + std::int64_t* incx, std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); + sycl::event (*row_major_sger_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + float alpha, const float* x, std::int64_t incx, + const float* y, std::int64_t incy, float* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_dger_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - double alpha, const double *x, std::int64_t incx, - const double *y, std::int64_t incy, double *a, + const std::vector& dependencies); + sycl::event (*row_major_dger_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + double alpha, const double* x, std::int64_t incx, + const double* y, std::int64_t incy, double* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_cgerc_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, + const std::vector& dependencies); + sycl::event (*row_major_cgerc_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_zgerc_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::vector& dependencies); + sycl::event (*row_major_zgerc_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_cgeru_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *x, - std::int64_t incx, const std::complex *y, - std::int64_t incy, std::complex *a, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_cgeru_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* x, + std::int64_t incx, const std::complex* y, + std::int64_t incy, std::complex* a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_zgeru_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + const std::vector& dependencies); + sycl::event (*row_major_zgeru_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_chbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_chbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *x, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, - const std::vector &dependencies); + std::complex* y, std::int64_t incy, + const std::vector& dependencies); sycl::event (*row_major_zhbmv_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, std::complex beta, - std::complex *y, std::int64_t incy, const std::vector &dependencies); - sycl::event (*row_major_chemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, + std::complex alpha, const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, std::complex beta, + std::complex* y, std::int64_t incy, const std::vector& dependencies); + sycl::event (*row_major_chemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zhemv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_zhemv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, std::int64_t lda, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_cher_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_cher_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_zher_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_zher_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_cher2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_cher2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_zher2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_zher2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_chpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_chpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_zhpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_zhpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *a, - const std::complex *x, std::int64_t incx, - std::complex beta, std::complex *y, + const std::complex* a, + const std::complex* x, std::int64_t incx, + std::complex beta, std::complex* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_chpr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_chpr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, float alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies); - sycl::event (*row_major_zhpr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies); + sycl::event (*row_major_zhpr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, double alpha, - const std::complex *x, std::int64_t incx, - std::complex *a, - const std::vector &dependencies); - sycl::event (*row_major_chpr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + std::complex* a, + const std::vector& dependencies); + sycl::event (*row_major_chpr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies); - sycl::event (*row_major_zhpr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies); + sycl::event (*row_major_zhpr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::complex alpha, - const std::complex *x, std::int64_t incx, - const std::complex *y, std::int64_t incy, - std::complex *a, - const std::vector &dependencies); - sycl::event (*row_major_ssbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* x, std::int64_t incx, + const std::complex* y, std::int64_t incy, + std::complex* a, + const std::vector& dependencies); + sycl::event (*row_major_ssbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *x, - std::int64_t incx, float beta, float *y, + const float* a, std::int64_t lda, const float* x, + std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dsbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dsbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *x, - std::int64_t incx, double beta, double *y, + const double* a, std::int64_t lda, const double* x, + std::int64_t incx, double beta, double* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_sspmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *a, - const float *x, std::int64_t incx, float beta, float *y, + const std::vector& dependencies); + sycl::event (*row_major_sspmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* a, + const float* x, std::int64_t incx, float beta, float* y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dspmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *a, - const double *x, std::int64_t incx, double beta, - double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_sspr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, - const std::vector &dependencies); - sycl::event (*row_major_dspr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, - const std::vector &dependencies); - sycl::event (*row_major_sspr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *a, const std::vector &dependencies); - sycl::event (*row_major_dspr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *a, - const std::vector &dependencies); - sycl::event (*row_major_ssymv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *x, std::int64_t incx, - float beta, float *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_dsymv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *x, std::int64_t incx, - double beta, double *y, std::int64_t incy, - const std::vector &dependencies); - sycl::event (*row_major_ssyr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, float *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_dsyr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, double *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_ssyr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, float alpha, const float *x, - std::int64_t incx, const float *y, std::int64_t incy, - float *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_dsyr2_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, - std::int64_t n, double alpha, const double *x, - std::int64_t incx, const double *y, std::int64_t incy, - double *a, std::int64_t lda, - const std::vector &dependencies); - sycl::event (*row_major_stbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dspmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* a, + const double* x, std::int64_t incx, double beta, + double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_sspr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, + const std::vector& dependencies); + sycl::event (*row_major_dspr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, + const std::vector& dependencies); + sycl::event (*row_major_sspr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* a, const std::vector& dependencies); + sycl::event (*row_major_dspr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* a, + const std::vector& dependencies); + sycl::event (*row_major_ssymv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* x, std::int64_t incx, + float beta, float* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_dsymv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* x, std::int64_t incx, + double beta, double* y, std::int64_t incy, + const std::vector& dependencies); + sycl::event (*row_major_ssyr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, float* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_dsyr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, double* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_ssyr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, float alpha, const float* x, + std::int64_t incx, const float* y, std::int64_t incy, + float* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_dsyr2_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, + std::int64_t n, double alpha, const double* x, + std::int64_t incx, const double* y, std::int64_t incy, + double* a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*row_major_stbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dtbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ctbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztbmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ztbmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_stbsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_stbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const float *a, std::int64_t lda, - float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtbsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, const float* a, std::int64_t lda, + float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dtbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const double *a, std::int64_t lda, - double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctbsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, const double* a, std::int64_t lda, + double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ctbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztbsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ztbsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - std::int64_t k, const std::complex *a, - std::int64_t lda, std::complex *x, + std::int64_t k, const std::complex* a, + std::int64_t lda, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_stpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_stpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dtpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ctpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztpmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ztpmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_stpsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_stpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const float *a, float *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtpsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const float* a, float* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_dtpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const double *a, double *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctpsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const double* a, double* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ctpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztpsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ztpsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::complex *x, + const std::complex* a, std::complex* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_strmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_strmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, + const float* a, std::int64_t lda, float* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtrmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dtrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, + const double* a, std::int64_t lda, double* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctrmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ctrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztrmv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ztrmv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_strsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_strsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const float *a, std::int64_t lda, float *x, + const float* a, std::int64_t lda, float* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_dtrsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dtrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const double *a, std::int64_t lda, double *x, + const double* a, std::int64_t lda, double* x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ctrsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ctrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_ztrsv_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_ztrsv_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t n, - const std::complex *a, std::int64_t lda, - std::complex *x, std::int64_t incx, - const std::vector &dependencies); - sycl::event (*row_major_sgemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::complex* a, std::int64_t lda, + std::complex* x, std::int64_t incx, + const std::vector& dependencies); + sycl::event (*row_major_sgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const float *a, std::int64_t lda, const float *b, - std::int64_t ldb, float beta, float *c, + const float* a, std::int64_t lda, const float* b, + std::int64_t ldb, float beta, float* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dgemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::vector& dependencies); + sycl::event (*row_major_dgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, double alpha, - const double *a, std::int64_t lda, const double *b, - std::int64_t ldb, double beta, double *c, + const double* a, std::int64_t lda, const double* b, + std::int64_t ldb, double beta, double* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_cgemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::vector& dependencies); + sycl::event (*row_major_cgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, + std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_zgemm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_hgemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_hgemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, - const sycl::half *a, std::int64_t lda, - const sycl::half *b, std::int64_t ldb, sycl::half beta, - sycl::half *c, std::int64_t ldc, - const std::vector &dependencies); + const sycl::half* a, std::int64_t lda, + const sycl::half* b, std::int64_t ldb, sycl::half beta, + sycl::half* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_gemm_f16f16f32_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, const sycl::half *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, const sycl::half* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies); sycl::event (*row_major_gemm_bf16bf16f32_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const oneapi::mkl::bfloat16 *a, - std::int64_t lda, const oneapi::mkl::bfloat16 *b, std::int64_t ldb, float beta, float *c, - std::int64_t ldc, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const oneapi::mkl::bfloat16* a, + std::int64_t lda, const oneapi::mkl::bfloat16* b, std::int64_t ldb, float beta, float* c, + std::int64_t ldc, const std::vector& dependencies); sycl::event (*row_major_chemm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*row_major_zhemm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*row_major_zhemm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_cherk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_cherk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, float alpha, - const std::complex *a, std::int64_t lda, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_zherk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_zherk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, double alpha, - const std::complex *a, std::int64_t lda, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_cher2k_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_cher2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - float beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_zher2k_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + float beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_zher2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - double beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_ssymm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + double beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_ssymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dsymm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_dsymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_csymm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*row_major_zsymm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*row_major_zsymm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_ssyrk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ssyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, float beta, float *c, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, float beta, float* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dsyrk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_dsyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, double beta, double *c, + std::int64_t k, double alpha, const double* a, + std::int64_t lda, double beta, double* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_csyrk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_csyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_zsyrk_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_zsyrk_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_ssyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *k, float *alpha, const float **a, std::int64_t *lda, - float *beta, float **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* k, float* alpha, const float** a, std::int64_t* lda, + float* beta, float** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_dsyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *k, double *alpha, const double **a, std::int64_t *lda, - double *beta, double **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* k, double* alpha, const double** a, std::int64_t* lda, + double* beta, double** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_csyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_zsyrk_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *upper_lower, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, std::complex *beta, - std::complex **c, std::int64_t *ldc, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* upper_lower, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, std::complex* beta, + std::complex** c, std::int64_t* ldc, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_ssyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_dsyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_csyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex *c, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_zsyrk_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex *c, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*row_major_ssyr2k_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + const std::vector& dependencies); + sycl::event (*row_major_ssyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dsyr2k_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_dsyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_csyr2k_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t k, std::complex alpha, const std::complex *a, - std::int64_t lda, const std::complex *b, std::int64_t ldb, std::complex beta, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*row_major_zsyr2k_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t k, std::complex alpha, const std::complex* a, + std::int64_t lda, const std::complex* b, std::int64_t ldb, std::complex beta, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*row_major_zsyr2k_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, - const std::complex *b, std::int64_t ldb, - std::complex beta, std::complex *c, + const std::complex* a, std::int64_t lda, + const std::complex* b, std::int64_t ldb, + std::complex beta, std::complex* c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_strmm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + const std::vector& dependencies); + sycl::event (*row_major_strmm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_dtrmm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_dtrmm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*row_major_ctrmm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*row_major_ztrmm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); - sycl::event (*row_major_strsm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); + sycl::event (*row_major_strsm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_dtrsm_usm_sycl)(sycl::queue &queue, oneapi::mkl::side left_right, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_dtrsm_usm_sycl)(sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double *b, std::int64_t ldb, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double* b, std::int64_t ldb, + const std::vector& dependencies); sycl::event (*row_major_ctrsm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*row_major_ztrsm_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, const std::vector& dependencies); sycl::event (*row_major_strsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_dtrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_ctrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_ztrsm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::side left_right, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, oneapi::mkl::diag unit_diag, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_strsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, float *alpha, const float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m, + std::int64_t* n, float* alpha, const float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_dtrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, double *alpha, const double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m, + std::int64_t* n, double* alpha, const double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_ctrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_ztrsm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::side *left_right, oneapi::mkl::uplo *upper_lower, - oneapi::mkl::transpose *trans, oneapi::mkl::diag *unit_diag, std::int64_t *m, - std::int64_t *n, std::complex *alpha, const std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::side* left_right, oneapi::mkl::uplo* upper_lower, + oneapi::mkl::transpose* trans, oneapi::mkl::diag* unit_diag, std::int64_t* m, + std::int64_t* n, std::complex* alpha, const std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_size, const std::vector& dependencies); sycl::event (*row_major_sgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const float **a, - std::int64_t *lda, const float **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const float** a, + std::int64_t* lda, const float** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_dgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, double *alpha, const double **a, - std::int64_t *lda, const double **b, std::int64_t *ldb, double *beta, double **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, double* alpha, const double** a, + std::int64_t* lda, const double** b, std::int64_t* ldb, double* beta, double** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_cgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_zgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, std::complex *alpha, - const std::complex **a, std::int64_t *lda, const std::complex **b, - std::int64_t *ldb, std::complex *beta, std::complex **c, std::int64_t *ldc, - std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, std::complex* alpha, + const std::complex** a, std::int64_t* lda, const std::complex** b, + std::int64_t* ldb, std::complex* beta, std::complex** c, std::int64_t* ldc, + std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_hgemm_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, sycl::half *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, sycl::half *beta, - sycl::half **c, std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, sycl::half* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, sycl::half* beta, + sycl::half** c, std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_gemm_f16f16f32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const sycl::half **a, - std::int64_t *lda, const sycl::half **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const sycl::half** a, + std::int64_t* lda, const sycl::half** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8f32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, float **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, float** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8s32_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *transa, oneapi::mkl::transpose *transb, - std::int64_t *m, std::int64_t *n, std::int64_t *k, float *alpha, const std::int8_t **a, - std::int64_t *lda, const std::int8_t **b, std::int64_t *ldb, float *beta, std::int32_t **c, - std::int64_t *ldc, std::int64_t group_count, std::int64_t *group_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* transa, oneapi::mkl::transpose* transb, + std::int64_t* m, std::int64_t* n, std::int64_t* k, float* alpha, const std::int8_t** a, + std::int64_t* lda, const std::int8_t** b, std::int64_t* ldb, float* beta, std::int32_t** c, + std::int64_t* ldc, std::int64_t group_count, std::int64_t* group_size, + const std::vector& dependencies); sycl::event (*row_major_sgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float *a, - std::int64_t lda, std::int64_t stride_a, const float *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const float* a, + std::int64_t lda, std::int64_t stride_a, const float* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_dgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double *a, - std::int64_t lda, std::int64_t stride_a, const double *b, std::int64_t ldb, - std::int64_t stride_b, double beta, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, double alpha, const double* a, + std::int64_t lda, std::int64_t stride_a, const double* b, std::int64_t ldb, + std::int64_t stride_b, double beta, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_cgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, std::int64_t stride_a, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex beta, std::complex *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + const std::complex* a, std::int64_t lda, std::int64_t stride_a, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex beta, std::complex* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_hgemm_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, sycl::half beta, sycl::half *c, std::int64_t ldc, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::half alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, sycl::half beta, sycl::half* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_gemm_f16f16f32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half *a, - std::int64_t lda, std::int64_t stride_a, const sycl::half *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const sycl::half* a, + std::int64_t lda, std::int64_t stride_a, const sycl::half* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8f32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, float *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, float* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8s32_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t *a, - std::int64_t lda, std::int64_t stride_a, const std::int8_t *b, std::int64_t ldb, - std::int64_t stride_b, float beta, std::int32_t *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); - sycl::event (*row_major_sgemmt_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::int64_t k, float alpha, const std::int8_t* a, + std::int64_t lda, std::int64_t stride_a, const std::int8_t* b, std::int64_t ldb, + std::int64_t stride_b, float beta, std::int32_t* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); + sycl::event (*row_major_sgemmt_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, - std::int64_t k, float alpha, const float *a, - std::int64_t lda, const float *b, std::int64_t ldb, - float beta, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_dgemmt_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo upper_lower, + std::int64_t k, float alpha, const float* a, + std::int64_t lda, const float* b, std::int64_t ldb, + float beta, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_dgemmt_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, - std::int64_t k, double alpha, const double *a, - std::int64_t lda, const double *b, std::int64_t ldb, - double beta, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t k, double alpha, const double* a, + std::int64_t lda, const double* b, std::int64_t ldb, + double beta, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_cgemmt_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_zgemmt_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, + sycl::queue& queue, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t n, std::int64_t k, std::complex alpha, - const std::complex *a, std::int64_t lda, const std::complex *b, - std::int64_t ldb, std::complex beta, std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* a, std::int64_t lda, const std::complex* b, + std::int64_t ldb, std::complex beta, std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_gemm_s8u8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*row_major_gemm_s8s8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::int8_t *a, std::int64_t lda, std::int8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::int8_t* a, std::int64_t lda, std::int8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*row_major_gemm_u8s8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, const std::int8_t *b, - std::int64_t ldb, std::int8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::int8_t* b, + std::int64_t ldb, std::int8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*row_major_gemm_u8u8s32_bias_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, oneapi::mkl::offset offsetc, std::int64_t m, std::int64_t n, std::int64_t k, float alpha, - const std::uint8_t *a, std::int64_t lda, std::uint8_t ao, const std::uint8_t *b, - std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t *c, std::int64_t ldc, - const std::int32_t *co, const std::vector &dependencies); + const std::uint8_t* a, std::int64_t lda, std::uint8_t ao, const std::uint8_t* b, + std::int64_t ldb, std::uint8_t bo, float beta, std::int32_t* c, std::int64_t ldc, + const std::int32_t* co, const std::vector& dependencies); sycl::event (*row_major_somatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, const float *a, std::int64_t lda, std::int64_t stride_a, float *b, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, const float* a, std::int64_t lda, std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_domatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, const double *a, std::int64_t lda, std::int64_t stride_a, double *b, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, const double* a, std::int64_t lda, std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*row_major_comatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zomatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, const std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, const std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_simatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - float alpha, float *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + float alpha, float* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_dimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - double alpha, double *ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + double alpha, double* ab, std::int64_t lda, std::int64_t ldb, std::int64_t stride, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_cimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_zimatcopy_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, std::int64_t lda, std::int64_t ldb, - std::int64_t stride, std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, + std::int64_t stride, std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_somatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, float alpha, const float *a, std::int64_t lda, - std::int64_t stride_a, float beta, const float *b, std::int64_t ldb, std::int64_t stride_b, - float *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, float alpha, const float* a, std::int64_t lda, + std::int64_t stride_a, float beta, const float* b, std::int64_t ldb, std::int64_t stride_b, + float* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_domatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, double alpha, const double *a, std::int64_t lda, - std::int64_t stride_a, double beta, const double *b, std::int64_t ldb, - std::int64_t stride_b, double *c, std::int64_t ldc, std::int64_t stride_c, - std::int64_t batch_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, double alpha, const double* a, std::int64_t lda, + std::int64_t stride_a, double beta, const double* b, std::int64_t ldb, + std::int64_t stride_b, double* c, std::int64_t ldc, std::int64_t stride_c, + std::int64_t batch_size, const std::vector& dependencies); sycl::event (*row_major_comatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); sycl::event (*row_major_zomatadd_batch_strided_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, std::int64_t lda, std::int64_t stride_a, std::complex beta, - const std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::complex *c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, - const std::vector &dependencies); - sycl::event (*row_major_somatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::complex* c, std::int64_t ldc, std::int64_t stride_c, std::int64_t batch_size, + const std::vector& dependencies); + sycl::event (*row_major_somatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, float *b, + const float* a, std::int64_t lda, float* b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_domatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_domatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, double *b, + const double* a, std::int64_t lda, double* b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_comatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_comatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_zomatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_zomatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_somatcopy2_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_somatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - const float *a, std::int64_t lda, - std::int64_t stridea, float *b, std::int64_t ldb, + const float* a, std::int64_t lda, + std::int64_t stridea, float* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*row_major_domatcopy2_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_domatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - const double *a, std::int64_t lda, - std::int64_t stridea, double *b, std::int64_t ldb, + const double* a, std::int64_t lda, + std::int64_t stridea, double* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*row_major_comatcopy2_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_comatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*row_major_zomatcopy2_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_zomatcopy2_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, - std::int64_t stridea, std::complex *b, + const std::complex* a, std::int64_t lda, + std::int64_t stridea, std::complex* b, std::int64_t ldb, std::int64_t strideb, - const std::vector &dependencies); - sycl::event (*row_major_simatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_simatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float alpha, - float *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_dimatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + float* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_dimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double alpha, - double *ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_cimatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + double* ab, std::int64_t lda, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*row_major_cimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex alpha, std::complex *ab, + std::complex alpha, std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_zimatcopy_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + const std::vector& dependencies); + sycl::event (*row_major_zimatcopy_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::complex alpha, - std::complex *ab, std::int64_t lda, + std::complex* ab, std::int64_t lda, std::int64_t ldb, - const std::vector &dependencies); - sycl::event (*row_major_somatadd_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + const std::vector& dependencies); + sycl::event (*row_major_somatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, float alpha, const float *a, - std::int64_t lda, float beta, const float *b, - std::int64_t ldb, float *c, std::int64_t ldc, - const std::vector &dependencies); - sycl::event (*row_major_domatadd_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + std::int64_t n, float alpha, const float* a, + std::int64_t lda, float beta, const float* b, + std::int64_t ldb, float* c, std::int64_t ldc, + const std::vector& dependencies); + sycl::event (*row_major_domatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, - std::int64_t n, double alpha, const double *a, - std::int64_t lda, double beta, const double *b, - std::int64_t ldb, double *c, std::int64_t ldc, - const std::vector &dependencies); + std::int64_t n, double alpha, const double* a, + std::int64_t lda, double beta, const double* b, + std::int64_t ldb, double* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_comatadd_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, - std::int64_t m, std::int64_t n, std::complex alpha, const std::complex *a, - std::int64_t lda, std::complex beta, const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, const std::vector &dependencies); - sycl::event (*row_major_zomatadd_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose transa, + sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, + std::int64_t m, std::int64_t n, std::complex alpha, const std::complex* a, + std::int64_t lda, std::complex beta, const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, const std::vector& dependencies); + sycl::event (*row_major_zomatadd_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, std::complex alpha, - const std::complex *a, std::int64_t lda, + const std::complex* a, std::int64_t lda, std::complex beta, - const std::complex *b, std::int64_t ldb, - std::complex *c, std::int64_t ldc, - const std::vector &dependencies); + const std::complex* b, std::int64_t ldb, + std::complex* c, std::int64_t ldc, + const std::vector& dependencies); sycl::event (*row_major_somatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, const float **a, std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, const float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*row_major_domatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, const double **a, std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, const double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*row_major_comatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*row_major_zomatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, const std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, const std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*row_major_simatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - float *alpha, float **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + float* alpha, float** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*row_major_dimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - double *alpha, double **ab, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *groupsize, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + double* alpha, double** ab, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* groupsize, const std::vector& dependencies); sycl::event (*row_major_cimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, std::complex **ab, std::int64_t *lda, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); sycl::event (*row_major_zimatcopy_batch_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *m, std::int64_t *n, - std::complex *alpha, std::complex **ab, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *groupsize, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* m, std::int64_t* n, + std::complex* alpha, std::complex** ab, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* groupsize, + const std::vector& dependencies); } blas_function_table_t; diff --git a/src/dft/backends/backend_backward_instantiations.cxx b/src/dft/backends/backend_backward_instantiations.cxx index a6aeaf71b..e4d960afb 100644 --- a/src/dft/backends/backend_backward_instantiations.cxx +++ b/src/dft/backends/backend_backward_instantiations.cxx @@ -25,29 +25,29 @@ using desc_rd_t = dft::detail::descriptor; using desc_cd_t = dft::detail::descriptor; -using depends_vec_t = const std::vector &; +using depends_vec_t = const std::vector&; -#define ONEMKL_DFT_BACKWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T) \ - /* Buffer API */ \ - template ONEMKL_EXPORT void compute_backward(DESCRIPTOR_T &, \ - sycl::buffer &); \ - template ONEMKL_EXPORT void compute_backward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &); \ - template ONEMKL_EXPORT void compute_backward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &); \ - template ONEMKL_EXPORT void compute_backward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &, \ - sycl::buffer &, sycl::buffer &); \ - \ - /* USM API */ \ - template ONEMKL_EXPORT sycl::event compute_backward(DESCRIPTOR_T &, FORWARD_T *, \ - depends_vec_t); \ - template ONEMKL_EXPORT sycl::event compute_backward(DESCRIPTOR_T &, SCALAR_T *, \ - SCALAR_T *, depends_vec_t); \ - template ONEMKL_EXPORT sycl::event compute_backward( \ - DESCRIPTOR_T &, BACKWARD_T *, FORWARD_T *, depends_vec_t); \ - template ONEMKL_EXPORT sycl::event compute_backward( \ - DESCRIPTOR_T &, SCALAR_T *, SCALAR_T *, SCALAR_T *, SCALAR_T *, depends_vec_t); +#define ONEMKL_DFT_BACKWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T) \ + /* Buffer API */ \ + template ONEMKL_EXPORT void compute_backward(DESCRIPTOR_T&, \ + sycl::buffer&); \ + template ONEMKL_EXPORT void compute_backward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&); \ + template ONEMKL_EXPORT void compute_backward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&); \ + template ONEMKL_EXPORT void compute_backward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&, sycl::buffer&, \ + sycl::buffer&); \ + \ + /* USM API */ \ + template ONEMKL_EXPORT sycl::event compute_backward(DESCRIPTOR_T&, FORWARD_T*, \ + depends_vec_t); \ + template ONEMKL_EXPORT sycl::event compute_backward(DESCRIPTOR_T&, SCALAR_T*, \ + SCALAR_T*, depends_vec_t); \ + template ONEMKL_EXPORT sycl::event compute_backward(DESCRIPTOR_T&, BACKWARD_T*, \ + FORWARD_T*, depends_vec_t); \ + template ONEMKL_EXPORT sycl::event compute_backward( \ + DESCRIPTOR_T&, SCALAR_T*, SCALAR_T*, SCALAR_T*, SCALAR_T*, depends_vec_t); ONEMKL_DFT_BACKWARD_INSTANTIATIONS(desc_rf_t, float, float, std::complex) ONEMKL_DFT_BACKWARD_INSTANTIATIONS(desc_cf_t, float, std::complex, std::complex) diff --git a/src/dft/backends/backend_forward_instantiations.cxx b/src/dft/backends/backend_forward_instantiations.cxx index a6ed371d5..b23a5ca40 100644 --- a/src/dft/backends/backend_forward_instantiations.cxx +++ b/src/dft/backends/backend_forward_instantiations.cxx @@ -25,29 +25,29 @@ using desc_rd_t = dft::detail::descriptor; using desc_cd_t = dft::detail::descriptor; -using depends_vec_t = const std::vector &; +using depends_vec_t = const std::vector&; -#define ONEMKL_DFT_FORWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T) \ - /* Buffer API */ \ - template ONEMKL_EXPORT void compute_forward(DESCRIPTOR_T &, \ - sycl::buffer &); \ - template ONEMKL_EXPORT void compute_forward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &); \ - template ONEMKL_EXPORT void compute_forward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &); \ - template ONEMKL_EXPORT void compute_forward( \ - DESCRIPTOR_T &, sycl::buffer &, sycl::buffer &, \ - sycl::buffer &, sycl::buffer &); \ - \ - /* USM API */ \ - template ONEMKL_EXPORT sycl::event compute_forward(DESCRIPTOR_T &, FORWARD_T *, \ - depends_vec_t); \ - template ONEMKL_EXPORT sycl::event compute_forward(DESCRIPTOR_T &, SCALAR_T *, \ - SCALAR_T *, depends_vec_t); \ - template ONEMKL_EXPORT sycl::event compute_forward(DESCRIPTOR_T &, FORWARD_T *, \ - BACKWARD_T *, depends_vec_t); \ - template ONEMKL_EXPORT sycl::event compute_forward( \ - DESCRIPTOR_T &, SCALAR_T *, SCALAR_T *, SCALAR_T *, SCALAR_T *, depends_vec_t); +#define ONEMKL_DFT_FORWARD_INSTANTIATIONS(DESCRIPTOR_T, SCALAR_T, FORWARD_T, BACKWARD_T) \ + /* Buffer API */ \ + template ONEMKL_EXPORT void compute_forward(DESCRIPTOR_T&, \ + sycl::buffer&); \ + template ONEMKL_EXPORT void compute_forward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&); \ + template ONEMKL_EXPORT void compute_forward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&); \ + template ONEMKL_EXPORT void compute_forward( \ + DESCRIPTOR_T&, sycl::buffer&, sycl::buffer&, sycl::buffer&, \ + sycl::buffer&); \ + \ + /* USM API */ \ + template ONEMKL_EXPORT sycl::event compute_forward(DESCRIPTOR_T&, FORWARD_T*, \ + depends_vec_t); \ + template ONEMKL_EXPORT sycl::event compute_forward(DESCRIPTOR_T&, SCALAR_T*, \ + SCALAR_T*, depends_vec_t); \ + template ONEMKL_EXPORT sycl::event compute_forward(DESCRIPTOR_T&, FORWARD_T*, \ + BACKWARD_T*, depends_vec_t); \ + template ONEMKL_EXPORT sycl::event compute_forward( \ + DESCRIPTOR_T&, SCALAR_T*, SCALAR_T*, SCALAR_T*, SCALAR_T*, depends_vec_t); ONEMKL_DFT_FORWARD_INSTANTIATIONS(desc_rf_t, float, float, std::complex) ONEMKL_DFT_FORWARD_INSTANTIATIONS(desc_cf_t, float, std::complex, std::complex) diff --git a/src/dft/backends/cufft/backward.cpp b/src/dft/backends/cufft/backward.cpp index 80e475991..4c0f76a70 100644 --- a/src/dft/backends/cufft/backward.cpp +++ b/src/dft/backends/cufft/backward.cpp @@ -38,19 +38,19 @@ namespace oneapi::mkl::dft::cufft { namespace detail { //forward declaration template -std::array get_offsets_bwd(dft::detail::commit_impl *commit); +std::array get_offsets_bwd(dft::detail::commit_impl* commit); template -cufftHandle get_bwd_plan(dft::detail::commit_impl *commit) { - return static_cast *>(commit->get_handle())[1].value(); +cufftHandle get_bwd_plan(dft::detail::commit_impl* commit) { + return static_cast*>(commit->get_handle())[1].value(); } } // namespace detail // BUFFER version //In-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout) { const std::string func_name = "compute_backward(desc, inout)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -68,35 +68,35 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, } } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_acc = inout.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, plan); - auto inout_native = reinterpret_cast *>( + auto inout_native = reinterpret_cast*>( ih.get_native_mem(inout_acc)); detail::cufft_execute>( - func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]), - reinterpret_cast(inout_native + offsets[1])); + func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]), + reinterpret_cast(inout_native + offsets[1])); }); }); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &, sycl::buffer, 1> &, - sycl::buffer, 1> &) { +ONEMKL_EXPORT void compute_backward(descriptor_type&, sycl::buffer, 1>&, + sycl::buffer, 1>&) { throw oneapi::mkl::unimplemented("DFT", "compute_backward(desc, inout_re, inout_im)", "cuFFT does not support real-real complex storage."); } //Out-of-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { const std::string func_name = "compute_backward(desc, in, out)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -113,7 +113,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, } } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto in_acc = in.template get_access(cgh); auto out_acc = out.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); @@ -121,12 +121,12 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, plan); - auto in_native = reinterpret_cast( - reinterpret_cast *>( + auto in_native = reinterpret_cast( + reinterpret_cast*>( ih.get_native_mem(in_acc)) + offsets[0]); - auto out_native = reinterpret_cast( - reinterpret_cast *>( + auto out_native = reinterpret_cast( + reinterpret_cast*>( ih.get_native_mem(out_acc)) + offsets[1]); detail::cufft_execute>( @@ -137,10 +137,10 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &, sycl::buffer, 1> &, - sycl::buffer, 1> &, - sycl::buffer, 1> &, - sycl::buffer, 1> &) { +ONEMKL_EXPORT void compute_backward(descriptor_type&, sycl::buffer, 1>&, + sycl::buffer, 1>&, + sycl::buffer, 1>&, + sycl::buffer, 1>&) { throw oneapi::mkl::unimplemented("DFT", "compute_backward(desc, in_re, in_im, out_re, out_im)", "cuFFT does not support real-real complex storage."); } @@ -149,8 +149,8 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &, sycl::buffer -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { const std::string func_name = "compute_backward(desc, inout, dependencies)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -168,7 +168,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -185,9 +185,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &, scalar *, - scalar *, - const std::vector &) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type&, scalar*, + scalar*, + const std::vector&) { throw oneapi::mkl::unimplemented("DFT", "compute_backward(desc, inout_re, inout_im, dependencies)", "cuFFT does not support real-real complex storage."); @@ -195,9 +195,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &, scalar -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& dependencies) { const std::string func_name = "compute_backward(desc, in, out, dependencies)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -214,7 +214,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -231,10 +231,10 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &, scalar *, - scalar *, scalar *, - scalar *, - const std::vector &) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type&, scalar*, + scalar*, scalar*, + scalar*, + const std::vector&) { throw oneapi::mkl::unimplemented("DFT", "compute_backward(desc, in_re, in_im, out_re, out_im, deps)", "cuFFT does not support real-real complex storage."); diff --git a/src/dft/backends/cufft/execute_helper.hpp b/src/dft/backends/cufft/execute_helper.hpp index 7b7d946db..88c2e3dba 100644 --- a/src/dft/backends/cufft/execute_helper.hpp +++ b/src/dft/backends/cufft/execute_helper.hpp @@ -37,8 +37,8 @@ namespace oneapi::mkl::dft::cufft::detail { template -inline dft::detail::commit_impl *checked_get_commit( - dft::detail::descriptor &desc) { +inline dft::detail::commit_impl* checked_get_commit( + dft::detail::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::cufft) { throw mkl::invalid_argument("dft/backends/cufft", "get_commit", @@ -50,7 +50,7 @@ inline dft::detail::commit_impl *checked_get_commit( /// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match /// the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -61,8 +61,8 @@ inline auto expect_config(DescT &desc, const char *message) { enum class Direction { Forward = CUFFT_FORWARD, Backward = CUFFT_INVERSE }; template -void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, void *input, - void *output) { +void cufft_execute(const std::string& func, CUstream stream, cufftHandle plan, void* input, + void* output) { constexpr bool is_real = std::is_floating_point_v; using single_type = std::conditional_t>; constexpr bool is_single = std::is_same_v; @@ -70,16 +70,16 @@ void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, v if constexpr (is_real) { if constexpr (dir == Direction::Forward) { if constexpr (is_single) { - auto result = cufftExecR2C(plan, reinterpret_cast(input), - reinterpret_cast(output)); + auto result = cufftExecR2C(plan, reinterpret_cast(input), + reinterpret_cast(output)); if (result != CUFFT_SUCCESS) { throw oneapi::mkl::exception("dft/backends/cufft", func, "cufftExecR2C returned " + std::to_string(result)); } } else { - auto result = cufftExecD2Z(plan, reinterpret_cast(input), - reinterpret_cast(output)); + auto result = cufftExecD2Z(plan, reinterpret_cast(input), + reinterpret_cast(output)); if (result != CUFFT_SUCCESS) { throw oneapi::mkl::exception("dft/backends/cufft", func, "cufftExecD2Z returned " + std::to_string(result)); @@ -88,16 +88,16 @@ void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, v } else { if constexpr (is_single) { - auto result = cufftExecC2R(plan, reinterpret_cast(input), - reinterpret_cast(output)); + auto result = cufftExecC2R(plan, reinterpret_cast(input), + reinterpret_cast(output)); if (result != CUFFT_SUCCESS) { throw oneapi::mkl::exception("dft/backends/cufft", func, "cufftExecC2R returned " + std::to_string(result)); } } else { - auto result = cufftExecZ2D(plan, reinterpret_cast(input), - reinterpret_cast(output)); + auto result = cufftExecZ2D(plan, reinterpret_cast(input), + reinterpret_cast(output)); if (result != CUFFT_SUCCESS) { throw oneapi::mkl::exception("dft/backends/cufft", func, "cufftExecZ2D returned " + std::to_string(result)); @@ -108,8 +108,8 @@ void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, v else { if constexpr (is_single) { auto result = - cufftExecC2C(plan, reinterpret_cast(input), - reinterpret_cast(output), static_cast(dir)); + cufftExecC2C(plan, reinterpret_cast(input), + reinterpret_cast(output), static_cast(dir)); if (result != CUFFT_SUCCESS) { throw oneapi::mkl::exception("dft/backends/cufft", func, "cufftExecC2C returned " + std::to_string(result)); @@ -117,8 +117,8 @@ void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, v } else { auto result = - cufftExecZ2Z(plan, reinterpret_cast(input), - reinterpret_cast(output), static_cast(dir)); + cufftExecZ2Z(plan, reinterpret_cast(input), + reinterpret_cast(output), static_cast(dir)); if (result != CUFFT_SUCCESS) { throw oneapi::mkl::exception("dft/backends/cufft", func, "cufftExecZ2Z returned " + std::to_string(result)); @@ -137,7 +137,7 @@ void cufft_execute(const std::string &func, CUstream stream, cufftHandle plan, v #endif } -inline CUstream setup_stream(const std::string &func, sycl::interop_handle ih, cufftHandle plan) { +inline CUstream setup_stream(const std::string& func, sycl::interop_handle ih, cufftHandle plan) { auto stream = ih.get_native_queue(); auto result = cufftSetStream(plan, stream); if (result != CUFFT_SUCCESS) { diff --git a/src/dft/backends/cufft/forward.cpp b/src/dft/backends/cufft/forward.cpp index 7cf73976d..0c003b844 100644 --- a/src/dft/backends/cufft/forward.cpp +++ b/src/dft/backends/cufft/forward.cpp @@ -40,11 +40,11 @@ namespace oneapi::mkl::dft::cufft { namespace detail { //forward declaration template -std::array get_offsets_fwd(dft::detail::commit_impl *commit); +std::array get_offsets_fwd(dft::detail::commit_impl* commit); template -cufftHandle get_fwd_plan(dft::detail::commit_impl *commit) { - return static_cast *>(commit->get_handle())[0].value(); +cufftHandle get_fwd_plan(dft::detail::commit_impl* commit) { + return static_cast*>(commit->get_handle())[0].value(); } } // namespace detail @@ -52,8 +52,8 @@ cufftHandle get_fwd_plan(dft::detail::commit_impl *commit) { //In-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout) { const std::string func_name = "compute_forward(desc, inout)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -71,34 +71,34 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, offsets[1] *= 2; // offset is supplied in complex but we offset scalar pointer } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_acc = inout.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, plan); - auto inout_native = reinterpret_cast *>( + auto inout_native = reinterpret_cast*>( ih.get_native_mem(inout_acc)); detail::cufft_execute>( - func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]), - reinterpret_cast(inout_native + offsets[1])); + func_name, stream, plan, reinterpret_cast(inout_native + offsets[0]), + reinterpret_cast(inout_native + offsets[1])); }); }); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_forward(descriptor_type &, sycl::buffer, 1> &, - sycl::buffer, 1> &) { +ONEMKL_EXPORT void compute_forward(descriptor_type&, sycl::buffer, 1>&, + sycl::buffer, 1>&) { throw oneapi::mkl::unimplemented("DFT", "compute_forward(desc, inout_re, inout_im)", "cuFFT does not support real-real complex storage."); } //Out-of-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { const std::string func_name = "compute_forward(desc, in, out)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -115,7 +115,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer(cgh); auto out_acc = out.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); @@ -123,12 +123,12 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer( - reinterpret_cast *>( + auto in_native = reinterpret_cast( + reinterpret_cast*>( ih.get_native_mem(in_acc)) + offsets[0]); - auto out_native = reinterpret_cast( - reinterpret_cast *>( + auto out_native = reinterpret_cast( + reinterpret_cast*>( ih.get_native_mem(out_acc)) + offsets[1]); detail::cufft_execute>( @@ -139,10 +139,10 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer -ONEMKL_EXPORT void compute_forward(descriptor_type &, sycl::buffer, 1> &, - sycl::buffer, 1> &, - sycl::buffer, 1> &, - sycl::buffer, 1> &) { +ONEMKL_EXPORT void compute_forward(descriptor_type&, sycl::buffer, 1>&, + sycl::buffer, 1>&, + sycl::buffer, 1>&, + sycl::buffer, 1>&) { throw oneapi::mkl::unimplemented("DFT", "compute_forward(desc, in_re, in_im, out_re, out_im)", "cuFFT does not support real-real complex storage."); } @@ -151,8 +151,8 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &, sycl::buffer -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { const std::string func_name = "compute_forward(desc, inout, dependencies)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -170,7 +170,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -187,9 +187,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &, scalar *, - scalar *, - const std::vector &) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type&, scalar*, + scalar*, + const std::vector&) { throw oneapi::mkl::unimplemented("DFT", "compute_forward(desc, inout_re, inout_im, dependencies)", "cuFFT does not support real-real complex storage."); @@ -197,9 +197,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &, scalar -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& dependencies) { const std::string func_name = "compute_forward(desc, in, out, dependencies)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -216,7 +216,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -233,10 +233,10 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &, scalar *, - scalar *, scalar *, - scalar *, - const std::vector &) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type&, scalar*, + scalar*, scalar*, + scalar*, + const std::vector&) { throw oneapi::mkl::unimplemented( "DFT", "compute_forward(desc, in_re, in_im, out_re, out_im, dependencies)", "cuFFT does not support real-real complex storage."); diff --git a/src/dft/backends/descriptor.cpp b/src/dft/backends/descriptor.cpp index c6f6884f8..5c3e163ca 100644 --- a/src/dft/backends/descriptor.cpp +++ b/src/dft/backends/descriptor.cpp @@ -25,7 +25,7 @@ namespace oneapi::mkl::dft::detail { template -void descriptor::commit(sycl::queue &queue) { +void descriptor::commit(sycl::queue& queue) { if (!pimpl_ || pimpl_->get_queue() != queue) { if (pimpl_) { pimpl_->get_queue().wait(); @@ -34,9 +34,9 @@ void descriptor::commit(sycl::queue &queue) { } pimpl_->commit(values_); } -template void descriptor::commit(sycl::queue &); -template void descriptor::commit(sycl::queue &); -template void descriptor::commit(sycl::queue &); -template void descriptor::commit(sycl::queue &); +template void descriptor::commit(sycl::queue&); +template void descriptor::commit(sycl::queue&); +template void descriptor::commit(sycl::queue&); +template void descriptor::commit(sycl::queue&); } //namespace oneapi::mkl::dft::detail diff --git a/src/dft/backends/mklcpu/backward.cpp b/src/dft/backends/mklcpu/backward.cpp index fe7186630..fe94691bc 100644 --- a/src/dft/backends/mklcpu/backward.cpp +++ b/src/dft/backends/mklcpu/backward.cpp @@ -40,14 +40,14 @@ namespace detail { // BUFFER version // backward a MKLCPU DFT call to the backend, checking that the commit impl is valid. template -inline void check_bwd_commit(dft::descriptor &desc) { +inline void check_bwd_commit(dft::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklcpu) { throw mkl::invalid_argument("DFT", "computer_backward", "DFT descriptor has not been commited for MKLCPU"); } - auto mklcpu_desc = reinterpret_cast(commit_handle->get_handle()); + auto mklcpu_desc = reinterpret_cast(commit_handle->get_handle()); MKL_LONG commit_status{ DFTI_UNCOMMITTED }; DftiGetValue(mklcpu_desc[1], DFTI_COMMIT_STATUS, &commit_status); if (commit_status != DFTI_COMMITTED) { @@ -59,7 +59,7 @@ inline void check_bwd_commit(dft::descriptor &desc) { // Throw an mkl::invalid_argument if the runtime param in the descriptor does not match // the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::detail::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -68,26 +68,26 @@ inline auto expect_config(DescT &desc, const char *message) { } // convert the base commit class to derived cpu commit class template -auto get_buffer(commit_t *commit_handle) { - commit_derived_t *derived_commit = - static_cast *>(commit_handle); +auto get_buffer(commit_t* commit_handle) { + commit_derived_t* derived_commit = + static_cast*>(commit_handle); return derived_commit->get_handle_buffer(); } } // namespace detail //In-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout) { detail::expect_config( desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto inout_acc = inout.template get_access(cgh); detail::host_task(cgh, [=]() { @@ -104,20 +104,20 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto re_acc = inout_re.template get_access(cgh); auto im_acc = inout_im.template get_access(cgh); @@ -136,26 +136,26 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //Out-of-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config(desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto in_acc = in.template get_access(cgh); auto out_acc = out.template get_access(cgh); detail::host_task(cgh, [=]() { - auto in_ptr = const_cast *>(detail::acc_to_ptr(in_acc)); + auto in_ptr = const_cast*>(detail::acc_to_ptr(in_acc)); DFT_ERROR status = DftiComputeBackward(desc_acc[detail::DIR::bwd], in_ptr, detail::acc_to_ptr(out_acc)); if (status != DFTI_NO_ERROR) { @@ -169,22 +169,22 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto inre_acc = in_re.template get_access(cgh); auto inim_acc = in_im.template get_access(cgh); @@ -192,8 +192,8 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, auto outim_acc = out_im.template get_access(cgh); detail::host_task(cgh, [=]() { - auto inre_ptr = const_cast *>(detail::acc_to_ptr(inre_acc)); - auto inim_ptr = const_cast *>(detail::acc_to_ptr(inim_acc)); + auto inre_ptr = const_cast*>(detail::acc_to_ptr(inre_acc)); + auto inim_ptr = const_cast*>(detail::acc_to_ptr(inim_acc)); DFT_ERROR status = DftiComputeBackward(desc_acc[detail::DIR::bwd], inre_ptr, inim_ptr, detail::acc_to_ptr(outre_acc), detail::acc_to_ptr(outim_acc)); @@ -210,18 +210,18 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //In-place transform template -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); detail::host_task(cgh, [=]() { @@ -237,19 +237,19 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); detail::host_task(cgh, [=]() { @@ -265,9 +265,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& dependencies) { // Check: inplace, complex storage detail::expect_config(desc, @@ -275,10 +275,10 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwdget_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); @@ -295,20 +295,20 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_bwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); diff --git a/src/dft/backends/mklcpu/forward.cpp b/src/dft/backends/mklcpu/forward.cpp index 2e5e2fa88..5d90b7854 100644 --- a/src/dft/backends/mklcpu/forward.cpp +++ b/src/dft/backends/mklcpu/forward.cpp @@ -40,14 +40,14 @@ namespace detail { // BUFFER version // Forward a MKLCPU DFT call to the backend, checking that the commit impl is valid. template -inline void check_fwd_commit(dft::descriptor &desc) { +inline void check_fwd_commit(dft::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::mklcpu) { throw mkl::invalid_argument("DFT", "computer_forward", "DFT descriptor has not been commited for MKLCPU"); } - auto mklcpu_desc = reinterpret_cast(commit_handle->get_handle()); + auto mklcpu_desc = reinterpret_cast(commit_handle->get_handle()); MKL_LONG commit_status{ DFTI_UNCOMMITTED }; DftiGetValue(mklcpu_desc[0], DFTI_COMMIT_STATUS, &commit_status); if (commit_status != DFTI_COMMITTED) { @@ -59,7 +59,7 @@ inline void check_fwd_commit(dft::descriptor &desc) { // Throw an mkl::invalid_argument if the runtime param in the descriptor does not match // the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::detail::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -69,26 +69,26 @@ inline auto expect_config(DescT &desc, const char *message) { // convert the base commit class to derived cpu commit class template -auto get_buffer(commit_t *commit_handle) { - commit_derived_t *derived_commit = - static_cast *>(commit_handle); +auto get_buffer(commit_t* commit_handle) { + commit_derived_t* derived_commit = + static_cast*>(commit_handle); return derived_commit->get_handle_buffer(); } } // namespace detail //In-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout) { detail::expect_config( desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto inout_acc = inout.template get_access(cgh); detail::host_task(cgh, [=]() { @@ -105,20 +105,20 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto re_acc = inout_re.template get_access(cgh); auto im_acc = inout_im.template get_access(cgh); @@ -137,25 +137,25 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, //Out-of-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config(desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto in_acc = in.template get_access(cgh); auto out_acc = out.template get_access(cgh); detail::host_task(cgh, [=]() { - auto in_ptr = const_cast *>(detail::acc_to_ptr(in_acc)); + auto in_ptr = const_cast*>(detail::acc_to_ptr(in_acc)); DFT_ERROR status = DftiComputeForward(desc_acc[detail::DIR::fwd], in_ptr, detail::acc_to_ptr(out_acc)); if (status != DFTI_NO_ERROR) { @@ -169,22 +169,22 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - cpu_queue.submit([&](sycl::handler &cgh) { + cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); auto inre_acc = in_re.template get_access(cgh); auto inim_acc = in_im.template get_access(cgh); @@ -192,8 +192,8 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, auto outim_acc = out_im.template get_access(cgh); detail::host_task(cgh, [=]() { - auto inre_ptr = const_cast *>(detail::acc_to_ptr(inre_acc)); - auto inim_ptr = const_cast *>(detail::acc_to_ptr(inim_acc)); + auto inre_ptr = const_cast*>(detail::acc_to_ptr(inre_acc)); + auto inim_ptr = const_cast*>(detail::acc_to_ptr(inim_acc)); DFT_ERROR status = DftiComputeForward(desc_acc[detail::DIR::fwd], inre_ptr, inim_ptr, detail::acc_to_ptr(outre_acc), detail::acc_to_ptr(outim_acc)); @@ -210,18 +210,18 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, //In-place transform template -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for placement"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); @@ -238,20 +238,20 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); @@ -268,9 +268,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& dependencies) { // Check: inplace detail::expect_config(desc, @@ -278,11 +278,11 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwdget_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); @@ -299,22 +299,22 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for complex storage"); auto commit_handle = dft::detail::get_commit(desc); detail::check_fwd_commit(desc); - sycl::queue &cpu_queue{ commit_handle->get_queue() }; + sycl::queue& cpu_queue{ commit_handle->get_queue() }; auto mklcpu_desc_buffer{ detail::get_buffer(commit_handle) }; - return cpu_queue.submit([&](sycl::handler &cgh) { + return cpu_queue.submit([&](sycl::handler& cgh) { auto desc_acc = mklcpu_desc_buffer.template get_access(cgh); cgh.depends_on(dependencies); diff --git a/src/dft/backends/mklgpu/backward.cpp b/src/dft/backends/mklgpu/backward.cpp index 4899ed3e7..c0648fef3 100644 --- a/src/dft/backends/mklgpu/backward.cpp +++ b/src/dft/backends/mklgpu/backward.cpp @@ -44,7 +44,7 @@ namespace detail { /// Forward a MKLGPU DFT call to the backend, checking that the commit impl is valid. /// Assumes backend descriptor values match those of the frontend. template -inline auto compute_backward(dft::detail::descriptor &desc, ArgTs &&... args) { +inline auto compute_backward(dft::detail::descriptor& desc, ArgTs&&... args) { using mklgpu_desc_t = dft::descriptor; using desc_shptr_t = std::shared_ptr; using handle_t = std::pair; @@ -53,7 +53,7 @@ inline auto compute_backward(dft::detail::descriptor &desc, ArgTs &&. throw mkl::invalid_argument("DFT", "compute_backward", "DFT descriptor has not been commited for MKLGPU"); } - auto handle = reinterpret_cast(commit_handle->get_handle()); + auto handle = reinterpret_cast(commit_handle->get_handle()); auto mklgpu_desc = handle->second; // Second because backward DFT. int commit_status{ DFTI_UNCOMMITTED }; mklgpu_desc->get_value(dft::config_param::COMMIT_STATUS, &commit_status); @@ -70,7 +70,7 @@ inline auto compute_backward(dft::detail::descriptor &desc, ArgTs &&. /// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match /// the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::detail::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -83,8 +83,8 @@ inline auto expect_config(DescT &desc, const char *message) { //In-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout) { detail::expect_config( desc, "Unexpected value for placement"); return detail::compute_backward(desc, inout); @@ -92,18 +92,18 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type & /*desc*/, - sycl::buffer, 1> & /*inout_re*/, - sycl::buffer, 1> & /*inout_im*/) { +ONEMKL_EXPORT void compute_backward(descriptor_type& /*desc*/, + sycl::buffer, 1>& /*inout_re*/, + sycl::buffer, 1>& /*inout_im*/) { throw mkl::unimplemented("DFT", "compute_backward", "MKLGPU does not support compute_backward(desc, inout_re, inout_im)."); } //Out-of-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config(desc, "Unexpected value for placement"); @@ -112,11 +112,11 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> & /*in_re*/, - sycl::buffer, 1> & /*in_im*/, - sycl::buffer, 1> & /*out_re*/, - sycl::buffer, 1> & /*out_im*/) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& /*in_re*/, + sycl::buffer, 1>& /*in_im*/, + sycl::buffer, 1>& /*out_re*/, + sycl::buffer, 1>& /*out_im*/) { detail::expect_config( desc, "Unexpected value for complex storage"); @@ -129,8 +129,8 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //In-place transform template -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for placement"); return detail::compute_backward(desc, inout, dependencies); @@ -138,10 +138,10 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type & /*desc*/, - scalar * /*inout_re*/, - scalar * /*inout_im*/, - const std::vector & /*dependencies*/) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& /*desc*/, + scalar* /*inout_re*/, + scalar* /*inout_im*/, + const std::vector& /*dependencies*/) { throw mkl::unimplemented( "DFT", "compute_backward", "MKLGPU does not support compute_backward(desc, inout_re, inout_im, dependencies)."); @@ -149,9 +149,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type & /*desc*/, //Out-of-place transform template -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& dependencies) { detail::expect_config(desc, "Unexpected value for placement"); @@ -160,12 +160,12 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, - scalar * /*in_re*/, - scalar * /*in_im*/, - scalar * /*out_re*/, - scalar * /*out_im*/, - const std::vector & /*dependencies*/) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, + scalar* /*in_re*/, + scalar* /*in_im*/, + scalar* /*out_re*/, + scalar* /*out_im*/, + const std::vector& /*dependencies*/) { detail::expect_config( desc, "Unexpected value for complex storage"); diff --git a/src/dft/backends/mklgpu/forward.cpp b/src/dft/backends/mklgpu/forward.cpp index 9dbffa081..fb526eee9 100644 --- a/src/dft/backends/mklgpu/forward.cpp +++ b/src/dft/backends/mklgpu/forward.cpp @@ -51,7 +51,7 @@ namespace detail { /// Forward a MKLGPU DFT call to the backend, checking that the commit impl is valid. /// Assumes backend descriptor values match those of the frontend. template -inline auto compute_forward(dft::detail::descriptor &desc, ArgTs &&... args) { +inline auto compute_forward(dft::detail::descriptor& desc, ArgTs&&... args) { using mklgpu_desc_t = dft::descriptor; using desc_shptr_t = std::shared_ptr; using handle_t = std::pair; @@ -60,7 +60,7 @@ inline auto compute_forward(dft::detail::descriptor &desc, ArgTs &&.. throw mkl::invalid_argument("DFT", "compute_forward", "DFT descriptor has not been commited for MKLGPU"); } - auto handle = reinterpret_cast(commit_handle->get_handle()); + auto handle = reinterpret_cast(commit_handle->get_handle()); auto mklgpu_desc = handle->first; // First because forward DFT. int commit_status{ DFTI_UNCOMMITTED }; mklgpu_desc->get_value(dft::config_param::COMMIT_STATUS, &commit_status); @@ -77,7 +77,7 @@ inline auto compute_forward(dft::detail::descriptor &desc, ArgTs &&.. /// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match /// the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::detail::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -90,8 +90,8 @@ inline auto expect_config(DescT &desc, const char *message) { //In-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout) { detail::expect_config( desc, "Unexpected value for placement"); return detail::compute_forward(desc, inout); @@ -99,17 +99,17 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_forward(descriptor_type & /*desc*/, - sycl::buffer, 1> & /*inout_re*/, - sycl::buffer, 1> & /*inout_im*/) { +ONEMKL_EXPORT void compute_forward(descriptor_type& /*desc*/, + sycl::buffer, 1>& /*inout_re*/, + sycl::buffer, 1>& /*inout_im*/) { throw mkl::unimplemented("DFT", "compute_forward", "MKLGPU does not support compute_forward(desc, inout_re, inout_im)."); } //Out-of-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config(desc, "Unexpected value for placement"); @@ -118,11 +118,11 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> & /*in_re*/, - sycl::buffer, 1> & /*in_im*/, - sycl::buffer, 1> & /*out_re*/, - sycl::buffer, 1> & /*out_im*/) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& /*in_re*/, + sycl::buffer, 1>& /*in_im*/, + sycl::buffer, 1>& /*out_re*/, + sycl::buffer, 1>& /*out_im*/) { detail::expect_config( desc, "Unexpected value for complex storage"); @@ -135,8 +135,8 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, //In-place transform template -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& dependencies) { detail::expect_config( desc, "Unexpected value for placement"); return detail::compute_forward(desc, inout, dependencies); @@ -144,10 +144,10 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type & /*desc*/, - scalar * /*inout_re*/, - scalar * /*inout_im*/, - const std::vector & /*dependencies*/) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& /*desc*/, + scalar* /*inout_re*/, + scalar* /*inout_im*/, + const std::vector& /*dependencies*/) { throw mkl::unimplemented( "DFT", "compute_forward", "MKLGPU does not support compute_forward(desc, inout_re, inout_im, dependencies)."); @@ -155,9 +155,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type & /*desc*/, //Out-of-place transform template -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &dependencies) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& dependencies) { detail::expect_config(desc, "Unexpected value for placement"); @@ -166,12 +166,11 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, - scalar * /*in_re*/, - scalar * /*in_im*/, - scalar * /*out_re*/, - scalar * /*out_im*/, - const std::vector & /*dependencies*/) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* /*in_re*/, + scalar* /*in_im*/, + scalar* /*out_re*/, + scalar* /*out_im*/, + const std::vector& /*dependencies*/) { detail::expect_config( desc, "Unexpected value for complex storage"); diff --git a/src/dft/backends/portfft/portfft_helper.hpp b/src/dft/backends/portfft/portfft_helper.hpp index 373865f49..010f2a5e6 100644 --- a/src/dft/backends/portfft/portfft_helper.hpp +++ b/src/dft/backends/portfft/portfft_helper.hpp @@ -31,8 +31,8 @@ namespace pfft = portfft; namespace oneapi::mkl::dft::portfft::detail { template -inline dft::detail::commit_impl *checked_get_commit( - dft::detail::descriptor &desc) { +inline dft::detail::commit_impl* checked_get_commit( + dft::detail::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::portfft) { throw mkl::invalid_argument("dft/backends/portfft", "get_commit", @@ -53,9 +53,9 @@ using storage_type = detail::to_pfft_domain::type::value>>; template -auto get_descriptors(descriptor_type &desc) { +auto get_descriptors(descriptor_type& desc) { auto commit = detail::checked_get_commit(desc); - return reinterpret_cast *>(commit->get_handle()); + return reinterpret_cast*>(commit->get_handle()); } } // namespace oneapi::mkl::dft::portfft::detail diff --git a/src/dft/backends/rocfft/backward.cpp b/src/dft/backends/rocfft/backward.cpp index e76437ee2..d6973bfb1 100644 --- a/src/dft/backends/rocfft/backward.cpp +++ b/src/dft/backends/rocfft/backward.cpp @@ -39,24 +39,24 @@ namespace oneapi::mkl::dft::rocfft { namespace detail { //forward declaration template -std::array get_offsets_bwd(dft::detail::commit_impl *commit); +std::array get_offsets_bwd(dft::detail::commit_impl* commit); template -rocfft_plan get_bwd_plan(dft::detail::commit_impl *commit) { - return static_cast(commit->get_handle())[1].plan.value(); +rocfft_plan get_bwd_plan(dft::detail::commit_impl* commit) { + return static_cast(commit->get_handle())[1].plan.value(); } template -rocfft_execution_info get_bwd_info(dft::detail::commit_impl *commit) { - return static_cast(commit->get_handle())[1].info.value(); +rocfft_execution_info get_bwd_info(dft::detail::commit_impl* commit) { + return static_cast(commit->get_handle())[1].info.value(); } } // namespace detail // BUFFER version //In-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout) { const std::string func_name = "compute_backward(desc, inout)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -75,26 +75,26 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!"); } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_acc = inout.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - auto inout_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, inout_acc)) + + auto inout_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, inout_acc)) + offsets[0]); - detail::execute_checked(func_name, stream, plan, &inout_native, nullptr, info); + detail::execute_checked(func_name, stream, plan, &inout_native, nullptr, info); }); }); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im) { const std::string func_name = "compute_backward(desc, inout_re, inout_im)"; auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); @@ -108,7 +108,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!"); } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_re_acc = inout_re.template get_access(cgh); auto inout_im_acc = inout_im.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); @@ -116,24 +116,24 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - std::array inout_native{ - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, inout_re_acc)) + - offsets[0]), - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, inout_im_acc)) + - offsets[0]) + std::array inout_native{ + reinterpret_cast(reinterpret_cast*>( + detail::native_mem(ih, inout_re_acc)) + + offsets[0]), + reinterpret_cast(reinterpret_cast*>( + detail::native_mem(ih, inout_im_acc)) + + offsets[0]) }; - detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); + detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); }); }); } //Out-of-place transform template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config( desc, "Unexpected value for placement"); auto commit = detail::checked_get_commit(desc); @@ -142,7 +142,7 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, auto info = detail::get_bwd_info(commit); auto offsets = detail::get_offsets_bwd(commit); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto in_acc = in.template get_access(cgh); auto out_acc = out.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_backward", cgh); @@ -151,31 +151,31 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, const std::string func_name = "compute_backward(desc, in, out)"; auto stream = detail::setup_stream(func_name, ih, info); - auto in_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_acc)) + + auto in_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_acc)) + offsets[0]); - auto out_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, out_acc)) + + auto out_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_acc)) + offsets[1]); - detail::execute_checked(func_name, stream, plan, &in_native, &out_native, info); + detail::execute_checked(func_name, stream, plan, &in_native, &out_native, info); }); }); } //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_backward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im) { +ONEMKL_EXPORT void compute_backward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im) { auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); auto plan = detail::get_bwd_plan(commit); auto info = detail::get_bwd_info(commit); auto offsets = detail::get_offsets_bwd(commit); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto in_re_acc = in_re.template get_access(cgh); auto in_im_acc = in_im.template get_access(cgh); auto out_re_acc = out_re.template get_access(cgh); @@ -186,23 +186,24 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, const std::string func_name = "compute_backward(desc, in_re, in_im, out_re, out_im)"; auto stream = detail::setup_stream(func_name, ih, info); - std::array in_native{ - reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_re_acc)) + + std::array in_native{ + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_re_acc)) + offsets[0]), - reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_im_acc)) + + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_im_acc)) + offsets[0]) }; - std::array out_native{ - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, out_re_acc)) + - offsets[1]), - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, out_im_acc)) + - offsets[1]) + std::array out_native{ + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_re_acc)) + + offsets[1]), + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_im_acc)) + + offsets[1]) }; - detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), info); + detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), + info); }); }); } @@ -211,8 +212,8 @@ ONEMKL_EXPORT void compute_backward(descriptor_type &desc, //In-place transform template -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd *inout, - const std::vector &deps) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, fwd* inout, + const std::vector& deps) { const std::string func_name = "compute_backward(desc, inout, deps)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -232,15 +233,15 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - void *inout_ptr = inout; - detail::execute_checked(func_name, stream, plan, &inout_ptr, nullptr, info); + void* inout_ptr = inout; + detail::execute_checked(func_name, stream, plan, &inout_ptr, nullptr, info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); @@ -249,9 +250,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &deps) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& deps) { const std::string func_name = "compute_backward(desc, inout_re, inout_im, deps)"; auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); @@ -265,16 +266,15 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalardepend_on_last_usm_workspace_event_if_rqd(cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - std::array inout_native{ inout_re + offsets[0], inout_im + offsets[0] }; - detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); - + std::array inout_native{ inout_re + offsets[0], inout_im + offsets[0] }; + detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); @@ -283,9 +283,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd *in, - fwd *out, - const std::vector &deps) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, bwd* in, + fwd* out, + const std::vector& deps) { detail::expect_config( desc, "Unexpected value for placement"); auto commit = detail::checked_get_commit(desc); @@ -297,7 +297,7 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -305,9 +305,9 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwdset_last_usm_workspace_event_if_rqd(sycl_event); @@ -316,18 +316,18 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, bwd -ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &deps) { +ONEMKL_EXPORT sycl::event compute_backward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& deps) { auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); auto plan = detail::get_bwd_plan(commit); auto info = detail::get_bwd_info(commit); auto offsets = detail::get_offsets_bwd(commit); - sycl::event sycl_event = queue.submit([&](sycl::handler &cgh) { + sycl::event sycl_event = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(deps); commit->depend_on_last_usm_workspace_event_if_rqd(cgh); @@ -336,9 +336,10 @@ ONEMKL_EXPORT sycl::event compute_backward(descriptor_type &desc, scalar in_native{ in_re + offsets[0], in_im + offsets[0] }; - std::array out_native{ out_re + offsets[1], out_im + offsets[1] }; - detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), info); + std::array in_native{ in_re + offsets[0], in_im + offsets[0] }; + std::array out_native{ out_re + offsets[1], out_im + offsets[1] }; + detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), + info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); diff --git a/src/dft/backends/rocfft/execute_helper.hpp b/src/dft/backends/rocfft/execute_helper.hpp index 78663a090..626b46a4c 100644 --- a/src/dft/backends/rocfft/execute_helper.hpp +++ b/src/dft/backends/rocfft/execute_helper.hpp @@ -37,8 +37,8 @@ namespace oneapi::mkl::dft::rocfft::detail { template -inline dft::detail::commit_impl *checked_get_commit( - dft::detail::descriptor &desc) { +inline dft::detail::commit_impl* checked_get_commit( + dft::detail::descriptor& desc) { auto commit_handle = dft::detail::get_commit(desc); if (commit_handle == nullptr || commit_handle->get_backend() != backend::rocfft) { throw mkl::invalid_argument("dft/backends/rocfft", "get_commit", @@ -50,7 +50,7 @@ inline dft::detail::commit_impl *checked_get_commit( /// Throw an mkl::invalid_argument if the runtime param in the descriptor does not match /// the expected value. template -inline auto expect_config(DescT &desc, const char *message) { +inline auto expect_config(DescT& desc, const char* message) { dft::config_value actual{ 0 }; desc.get_value(Param, &actual); if (actual != Expected) { @@ -59,11 +59,11 @@ inline auto expect_config(DescT &desc, const char *message) { } template -inline void *native_mem(sycl::interop_handle &ih, Acc &buf) { +inline void* native_mem(sycl::interop_handle& ih, Acc& buf) { return ih.get_native_mem(buf); } -inline hipStream_t setup_stream(const std::string &func, sycl::interop_handle &ih, +inline hipStream_t setup_stream(const std::string& func, sycl::interop_handle& ih, rocfft_execution_info info) { auto stream = ih.get_native_queue(); auto result = rocfft_execution_info_set_stream(info, stream); @@ -75,16 +75,16 @@ inline hipStream_t setup_stream(const std::string &func, sycl::interop_handle &i return stream; } -inline void sync_checked(const std::string &func, hipStream_t stream) { - auto result = hipStreamSynchronize(stream); - if (result != hipSuccess) { - throw oneapi::mkl::exception("dft/backends/rocfft", func, - "hipStreamSynchronize returned " + std::to_string(result)); - } +inline void sync_checked(const std::string& func, hipStream_t stream) { + auto result = hipStreamSynchronize(stream); + if (result != hipSuccess) { + throw oneapi::mkl::exception("dft/backends/rocfft", func, + "hipStreamSynchronize returned " + std::to_string(result)); + } } -inline void execute_checked(const std::string &func, hipStream_t stream, const rocfft_plan plan, void *in_buffer[], - void *out_buffer[], rocfft_execution_info info) { +inline void execute_checked(const std::string& func, hipStream_t stream, const rocfft_plan plan, + void* in_buffer[], void* out_buffer[], rocfft_execution_info info) { auto result = rocfft_execute(plan, in_buffer, out_buffer, info); if (result != rocfft_status_success) { throw oneapi::mkl::exception("dft/backends/rocfft", func, diff --git a/src/dft/backends/rocfft/forward.cpp b/src/dft/backends/rocfft/forward.cpp index d9a576720..e40469fe5 100644 --- a/src/dft/backends/rocfft/forward.cpp +++ b/src/dft/backends/rocfft/forward.cpp @@ -41,16 +41,16 @@ namespace oneapi::mkl::dft::rocfft { namespace detail { //forward declaration template -std::array get_offsets_fwd(dft::detail::commit_impl *commit); +std::array get_offsets_fwd(dft::detail::commit_impl* commit); template -rocfft_plan get_fwd_plan(dft::detail::commit_impl *commit) { - return static_cast(commit->get_handle())[0].plan.value(); +rocfft_plan get_fwd_plan(dft::detail::commit_impl* commit) { + return static_cast(commit->get_handle())[0].plan.value(); } template -rocfft_execution_info get_fwd_info(dft::detail::commit_impl *commit) { - return static_cast(commit->get_handle())[0].info.value(); +rocfft_execution_info get_fwd_info(dft::detail::commit_impl* commit) { + return static_cast(commit->get_handle())[0].info.value(); } } // namespace detail @@ -58,8 +58,8 @@ rocfft_execution_info get_fwd_info(dft::detail::commit_impl *commit) //In-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout) { const std::string func_name = "compute_forward(desc, inout)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -78,26 +78,26 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!"); } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_acc = inout.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - auto inout_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, inout_acc)) + + auto inout_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, inout_acc)) + offsets[0]); - detail::execute_checked(func_name, stream, plan, &inout_native, nullptr, info); + detail::execute_checked(func_name, stream, plan, &inout_native, nullptr, info); }); }); } //In-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &inout_re, - sycl::buffer, 1> &inout_im) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& inout_re, + sycl::buffer, 1>& inout_im) { const std::string func_name = "compute_forward(desc, inout_re, inout_im)"; auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); @@ -111,7 +111,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, "rocFFT requires input and output offsets (first value in strides) to be equal for in-place transforms!"); } - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto inout_re_acc = inout_re.template get_access(cgh); auto inout_im_acc = inout_im.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); @@ -119,23 +119,23 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - std::array inout_native{ - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, inout_re_acc)) + - offsets[0]), - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, inout_im_acc)) + - offsets[0]) + std::array inout_native{ + reinterpret_cast(reinterpret_cast*>( + detail::native_mem(ih, inout_re_acc)) + + offsets[0]), + reinterpret_cast(reinterpret_cast*>( + detail::native_mem(ih, inout_im_acc)) + + offsets[0]) }; - detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); + detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); }); }); } //Out-of-place transform template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer, 1> &in, - sycl::buffer, 1> &out) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, sycl::buffer, 1>& in, + sycl::buffer, 1>& out) { detail::expect_config( desc, "Unexpected value for placement"); auto commit = detail::checked_get_commit(desc); @@ -144,7 +144,7 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer(cgh); auto out_acc = out.template get_access(cgh); commit->add_buffer_workspace_dependency_if_rqd("compute_forward", cgh); @@ -153,31 +153,31 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, sycl::buffer( - reinterpret_cast *>(detail::native_mem(ih, in_acc)) + + auto in_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_acc)) + offsets[0]); - auto out_native = reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, out_acc)) + + auto out_native = reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_acc)) + offsets[1]); - detail::execute_checked(func_name, stream, plan, &in_native, &out_native, info); + detail::execute_checked(func_name, stream, plan, &in_native, &out_native, info); }); }); } //Out-of-place transform, using config_param::COMPLEX_STORAGE=config_value::REAL_REAL data format template -ONEMKL_EXPORT void compute_forward(descriptor_type &desc, - sycl::buffer, 1> &in_re, - sycl::buffer, 1> &in_im, - sycl::buffer, 1> &out_re, - sycl::buffer, 1> &out_im) { +ONEMKL_EXPORT void compute_forward(descriptor_type& desc, + sycl::buffer, 1>& in_re, + sycl::buffer, 1>& in_im, + sycl::buffer, 1>& out_re, + sycl::buffer, 1>& out_im) { auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); auto plan = detail::get_fwd_plan(commit); auto info = detail::get_fwd_info(commit); auto offsets = detail::get_offsets_fwd(commit); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto in_re_acc = in_re.template get_access(cgh); auto in_im_acc = in_im.template get_access(cgh); auto out_re_acc = out_re.template get_access(cgh); @@ -188,23 +188,24 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, const std::string func_name = "compute_forward(desc, in_re, in_im, out_re, out_im)"; auto stream = detail::setup_stream(func_name, ih, info); - std::array in_native{ - reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_re_acc)) + + std::array in_native{ + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_re_acc)) + offsets[0]), - reinterpret_cast( - reinterpret_cast *>(detail::native_mem(ih, in_im_acc)) + + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, in_im_acc)) + offsets[0]) }; - std::array out_native{ - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, out_re_acc)) + - offsets[1]), - reinterpret_cast(reinterpret_cast *>( - detail::native_mem(ih, out_im_acc)) + - offsets[1]) + std::array out_native{ + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_re_acc)) + + offsets[1]), + reinterpret_cast( + reinterpret_cast*>(detail::native_mem(ih, out_im_acc)) + + offsets[1]) }; - detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), info); + detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), + info); }); }); } @@ -213,8 +214,8 @@ ONEMKL_EXPORT void compute_forward(descriptor_type &desc, //In-place transform template -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *inout, - const std::vector &deps) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* inout, + const std::vector& deps) { const std::string func_name = "compute_forward(desc, inout, deps)"; detail::expect_config( desc, "Unexpected value for placement"); @@ -234,15 +235,15 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - void *inout_ptr = inout; - detail::execute_checked(func_name, stream, plan, &inout_ptr, nullptr, info); + void* inout_ptr = inout; + detail::execute_checked(func_name, stream, plan, &inout_ptr, nullptr, info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); @@ -251,9 +252,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *inout_re, - scalar *inout_im, - const std::vector &deps) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* inout_re, + scalar* inout_im, + const std::vector& deps) { const std::string func_name = "compute_forward(desc, inout_re, inout_im, deps)"; auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); @@ -267,14 +268,14 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalardepend_on_last_usm_workspace_event_if_rqd(cgh); dft::detail::fft_enqueue_task(cgh, [=](sycl::interop_handle ih) { auto stream = detail::setup_stream(func_name, ih, info); - std::array inout_native{ inout_re + offsets[0], inout_im + offsets[0] }; - detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); + std::array inout_native{ inout_re + offsets[0], inout_im + offsets[0] }; + detail::execute_checked(func_name, stream, plan, inout_native.data(), nullptr, info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); @@ -283,9 +284,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd *in, - bwd *out, - const std::vector &deps) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, fwd* in, + bwd* out, + const std::vector& deps) { detail::expect_config( desc, "Unexpected value for placement"); auto commit = detail::checked_get_commit(desc); @@ -297,7 +298,7 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwddepend_on_last_usm_workspace_event_if_rqd(cgh); @@ -305,9 +306,9 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwdset_last_usm_workspace_event_if_rqd(sycl_event); @@ -316,18 +317,18 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, fwd -ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar *in_re, - scalar *in_im, - scalar *out_re, - scalar *out_im, - const std::vector &deps) { +ONEMKL_EXPORT sycl::event compute_forward(descriptor_type& desc, scalar* in_re, + scalar* in_im, + scalar* out_re, + scalar* out_im, + const std::vector& deps) { auto commit = detail::checked_get_commit(desc); auto queue = commit->get_queue(); auto plan = detail::get_fwd_plan(commit); auto info = detail::get_fwd_info(commit); auto offsets = detail::get_offsets_fwd(commit); - sycl::event sycl_event = queue.submit([&](sycl::handler &cgh) { + sycl::event sycl_event = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(deps); commit->depend_on_last_usm_workspace_event_if_rqd(cgh); @@ -336,9 +337,10 @@ ONEMKL_EXPORT sycl::event compute_forward(descriptor_type &desc, scalar in_native{ in_re + offsets[0], in_im + offsets[0] }; - std::array out_native{ out_re + offsets[1], out_im + offsets[1] }; - detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), info); + std::array in_native{ in_re + offsets[0], in_im + offsets[0] }; + std::array out_native{ out_re + offsets[1], out_im + offsets[1] }; + detail::execute_checked(func_name, stream, plan, in_native.data(), out_native.data(), + info); }); }); commit->set_last_usm_workspace_event_if_rqd(sycl_event); diff --git a/src/dft/execute_helper_generic.hpp b/src/dft/execute_helper_generic.hpp index 22fe0cb33..a7939a653 100644 --- a/src/dft/execute_helper_generic.hpp +++ b/src/dft/execute_helper_generic.hpp @@ -40,9 +40,9 @@ namespace oneapi::mkl::dft::detail { template static inline void fft_enqueue_task(HandlerT&& cgh, FnT&& f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else - cgh.host_task([=](sycl::interop_handle ih){ + cgh.host_task([=](sycl::interop_handle ih) { #endif f(std::move(ih)); }); diff --git a/src/include/allocator_helper.hpp b/src/include/allocator_helper.hpp index 8ea802dd1..2678dc114 100644 --- a/src/include/allocator_helper.hpp +++ b/src/include/allocator_helper.hpp @@ -29,7 +29,7 @@ namespace oneapi { namespace mkl { -static inline void *aligned_alloc(size_t align, size_t size) { +static inline void* aligned_alloc(size_t align, size_t size) { #ifdef _WIN64 return ::_aligned_malloc(size, align); #else @@ -37,7 +37,7 @@ static inline void *aligned_alloc(size_t align, size_t size) { #endif } -static inline void aligned_free(void *p) { +static inline void aligned_free(void* p) { #ifdef _WIN64 ::_aligned_free(p); #else diff --git a/src/include/function_table_initializer.hpp b/src/include/function_table_initializer.hpp index 8a870e218..0c0e040c0 100644 --- a/src/include/function_table_initializer.hpp +++ b/src/include/function_table_initializer.hpp @@ -30,7 +30,7 @@ #ifdef __linux__ #include -#define LIB_TYPE void * +#define LIB_TYPE void* #define GET_LIB_HANDLE(libname) dlopen((libname), RTLD_LAZY | RTLD_GLOBAL) #define GET_FUNC(lib, fn) dlsym(lib, (fn)) #define FREE_LIB_HANDLE(libname) dlclose(libname) @@ -59,7 +59,7 @@ class table_initializer { using dlhandle = std::unique_ptr; public: - function_table_t &operator[](std::pair device_queue_pair) { + function_table_t& operator[](std::pair device_queue_pair) { auto lib = tables.find(device_queue_pair.first); if (lib != tables.end()) return lib->second; @@ -96,10 +96,10 @@ class table_initializer { } #endif - function_table_t &add_table(oneapi::mkl::device key, sycl::queue &q) { + function_table_t& add_table(oneapi::mkl::device key, sycl::queue& q) { dlhandle handle; // check all available libraries for the key(device) - for (const char *libname : libraries[domain_id][key]) { + for (const char* libname : libraries[domain_id][key]) { handle = dlhandle{ ::GET_LIB_HANDLE(libname) }; if (handle) break; @@ -114,7 +114,7 @@ class table_initializer { } } auto t = - reinterpret_cast(::GET_FUNC(handle.get(), table_names[domain_id])); + reinterpret_cast(::GET_FUNC(handle.get(), table_names[domain_id])); if (!t) { std::cerr << ERROR_MSG << '\n'; diff --git a/src/lapack/backends/cusolver/cusolver_batch.cpp b/src/lapack/backends/cusolver/cusolver_batch.cpp index f4017f873..f5e2a6e5f 100644 --- a/src/lapack/backends/cusolver/cusolver_batch.cpp +++ b/src/lapack/backends/cusolver/cusolver_batch.cpp @@ -31,24 +31,24 @@ namespace cusolver { // BATCH BUFFER API template -inline void geqrf_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void geqrf_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, stride_a, stride_tau, batch_size, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -62,10 +62,10 @@ inline void geqrf_batch(const char *func_name, Func func, sycl::queue &queue, st } #define GEQRF_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, \ + void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, \ std::int64_t stride_tau, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return geqrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size); \ } @@ -78,10 +78,10 @@ GEQRF_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZgeqrf) #undef GEQRF_STRIDED_BATCH_LAUNCHER template -inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +inline void getri_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; @@ -91,7 +91,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st sycl::buffer ipiv32(sycl::range<1>{ ipiv32_size }); sycl::buffer devInfo{ batch_size }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv_acc = sycl::accessor{ ipiv, cgh, sycl::read_only }; auto ipiv32_acc = sycl::accessor{ ipiv32, cgh, sycl::write_only }; cgh.parallel_for(sycl::range<1>{ ipiv32_size }, [=](sycl::id<1> index) { @@ -102,7 +102,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st // getri_batched is contained within cublas, not cusolver. For this reason // we need to use cublas types instead of cusolver types (as is needed for // other lapack routines) - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { using blas::cublas::cublas_error; sycl::accessor a_acc{ a, cgh, sycl::read_only }; @@ -110,7 +110,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st sycl::accessor ipiv32_acc{ ipiv32, cgh }; sycl::accessor devInfo_acc{ devInfo, cgh, sycl::write_only }; - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { cublasStatus_t err; CUresult cuda_result; cublasHandle_t cublas_handle; @@ -118,27 +118,28 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st CUstream cu_stream = sycl::get_native(queue); CUBLAS_ERROR_FUNC(cublasSetStream, err, cublas_handle, cu_stream); - auto a_ = sc.get_mem(a_acc); - auto scratch_ = sc.get_mem(scratch_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto info_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto scratch_ = sc.get_mem(scratch_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto info_ = sc.get_mem(devInfo_acc); CUdeviceptr a_dev; - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); + auto** a_dev_ = reinterpret_cast(a_dev); CUdeviceptr scratch_dev; - cuDataType **scratch_batched = + cuDataType** scratch_batched = create_ptr_list_from_stride(scratch_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &scratch_dev, sizeof(T *) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &scratch_dev, sizeof(T*) * batch_size); CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, scratch_dev, scratch_batched, - sizeof(T *) * batch_size); - auto **scratch_dev_ = reinterpret_cast(scratch_dev); + sizeof(T*) * batch_size); + auto** scratch_dev_ = reinterpret_cast(scratch_dev); - blas::cublas::cublas_native_named_func(func_name, func, err, cublas_handle, n, a_dev_, lda, ipiv32_, - scratch_dev_, lda, info_, batch_size); + blas::cublas::cublas_native_named_func(func_name, func, err, cublas_handle, n, a_dev_, + lda, ipiv32_, scratch_dev_, lda, info_, + batch_size); free(a_batched); free(scratch_batched); @@ -148,7 +149,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st }); // The inverted matrices stored in scratch_ need to be stored in a_ - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { sycl::accessor a_acc{ a, cgh, sycl::write_only }; sycl::accessor scratch_acc{ scratchpad, cgh, sycl::read_only }; cgh.parallel_for(sycl::range<1>{ static_cast( @@ -156,7 +157,7 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st [=](sycl::id<1> index) { a_acc[index] = scratch_acc[index]; }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { sycl::accessor ipiv32_acc{ ipiv32, cgh, sycl::read_only }; sycl::accessor ipiv_acc{ ipiv, cgh, sycl::write_only }; cgh.parallel_for(sycl::range<1>{ static_cast(ipiv32_size) }, @@ -168,10 +169,10 @@ inline void getri_batch(const char *func_name, Func func, sycl::queue &queue, st } #define GETRI_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - std::int64_t stride_a, sycl::buffer &ipiv, \ + void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + std::int64_t stride_a, sycl::buffer& ipiv, \ std::int64_t stride_ipiv, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return getri_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, n, a, lda, stride_a, ipiv, \ stride_ipiv, batch_size, scratchpad, scratchpad_size); \ } @@ -184,12 +185,12 @@ GETRI_STRIDED_BATCH_LAUNCHER(std::complex, cublasZgetriBatched) #undef GETRI_STRIDED_BATCH_LAUNCHER template -inline void getrs_batch(const char *func_name, Func func, sycl::queue &queue, +inline void getrs_batch(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; @@ -201,7 +202,7 @@ inline void getrs_batch(const char *func_name, Func func, sycl::queue &queue, std::uint64_t ipiv_size = stride_ipiv * batch_size; sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -209,16 +210,16 @@ inline void getrs_batch(const char *func_name, Func func, sycl::queue &queue, }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv_acc = ipiv32.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv_ = sc.get_mem(ipiv_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv_ = sc.get_mem(ipiv_acc); + auto b_ = sc.get_mem(b_acc); cusolverStatus_t err; // Does not use scratch so call cuSolver asynchronously and sync at end @@ -235,12 +236,12 @@ inline void getrs_batch(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, \ - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, \ - std::int64_t stride_a, sycl::buffer &ipiv, \ - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, \ + void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + std::int64_t stride_a, sycl::buffer& ipiv, \ + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, \ std::int64_t stride_b, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, \ stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, \ scratchpad_size); \ @@ -254,10 +255,10 @@ GETRS_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZgetrs) #undef GETRS_STRIDED_BATCH_LAUNCHER template -inline void getrf_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +inline void getrf_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; @@ -270,17 +271,17 @@ inline void getrf_batch(const char *func_name, Func func, sycl::queue &queue, st sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); sycl::buffer devInfo{ batch_size }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -292,7 +293,7 @@ inline void getrf_batch(const char *func_name, Func func, sycl::queue &queue, st }); // Copy from 32-bit USM to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, @@ -303,10 +304,10 @@ inline void getrf_batch(const char *func_name, Func func, sycl::queue &queue, st } #define GETRF_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, \ + void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, \ std::int64_t stride_ipiv, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return getrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, stride_a, \ ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); \ } @@ -319,25 +320,25 @@ GETRF_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZgetrf) #undef GETRF_STRIDED_BATCH_LAUNCHER template -inline void orgqr_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +inline void orgqr_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, stride_a, stride_tau, batch_size, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -351,10 +352,10 @@ inline void orgqr_batch(const char *func_name, Func func, sycl::queue &queue, st } #define ORGQR_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, \ - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, \ + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return orgqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size); \ } @@ -365,30 +366,30 @@ ORGQR_STRIDED_BATCH_LAUNCHER(double, cusolverDnDorgqr) #undef ORGQR_STRIDED_BATCH_LAUNCHER template -inline void potrf_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, +inline void potrf_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, stride_a, batch_size, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); CUdeviceptr a_dev; CUresult cuda_result; cusolverStatus_t err; - auto a_ = sc.get_mem(a_acc); + auto a_ = sc.get_mem(a_acc); // Transform ptr and stride to list of ptr's - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + auto** a_dev_ = reinterpret_cast(a_dev); cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), (int)n, a_dev_, (int)lda, nullptr, (int)batch_size); @@ -401,9 +402,9 @@ inline void potrf_batch(const char *func_name, Func func, sycl::queue &queue, // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRF_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, \ - std::int64_t batch_size, sycl::buffer &scratchpad, \ + void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, \ + std::int64_t batch_size, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, stride_a, \ batch_size, scratchpad, scratchpad_size); \ @@ -417,11 +418,11 @@ POTRF_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZpotrfBatched) #undef POTRF_STRIDED_BATCH_LAUNCHER template -inline void potrs_batch(const char *func_name, Func func, sycl::queue &queue, +inline void potrs_batch(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; @@ -431,28 +432,28 @@ inline void potrs_batch(const char *func_name, Func func, sycl::queue &queue, if (nrhs != 1) throw unimplemented("lapack", "potrs_batch", "cusolver potrs_batch only supports nrhs = 1"); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); CUdeviceptr a_dev, b_dev; cusolverStatus_t err; CUresult cuda_result; - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); // Transform ptr and stride to list of ptr's - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - cuDataType **b_batched = create_ptr_list_from_stride(b_, stride_b, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &b_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, b_dev, b_batched, sizeof(T *) * batch_size); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + cuDataType** b_batched = create_ptr_list_from_stride(b_, stride_b, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &b_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, b_dev, b_batched, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); - auto **b_dev_ = reinterpret_cast(b_dev); + auto** a_dev_ = reinterpret_cast(a_dev); + auto** b_dev_ = reinterpret_cast(b_dev); cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), (int)n, (int)nrhs, a_dev_, (int)lda, b_dev_, ldb, nullptr, @@ -468,11 +469,11 @@ inline void potrs_batch(const char *func_name, Func func, sycl::queue &queue, // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRS_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, \ - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, \ - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, \ + void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, \ std::int64_t stride_b, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return potrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, \ stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); \ } @@ -485,25 +486,25 @@ POTRS_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZpotrsBatched) #undef POTRS_STRIDED_BATCH_LAUNCHER template -inline void ungqr_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, +inline void ungqr_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, stride_a, stride_tau, batch_size, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -517,10 +518,10 @@ inline void ungqr_batch(const char *func_name, Func func, sycl::queue &queue, st } #define UNGQR_STRIDED_BATCH_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, \ - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, \ + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ return ungqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size); \ } @@ -533,22 +534,22 @@ UNGQR_STRIDED_BATCH_LAUNCHER(std::complex, cusolverDnZungqr) // BATCH USM API template -inline sycl::event geqrf_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, std::int64_t stride_a, - T *tau, std::int64_t stride_tau, std::int64_t batch_size, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event geqrf_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t stride_a, + T* tau, std::int64_t stride_tau, std::int64_t batch_size, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, stride_a, stride_tau, batch_size, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -564,11 +565,11 @@ inline sycl::event geqrf_batch(const char *func_name, Func func, sycl::queue &qu } #define GEQRF_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t stride_a, TYPE *tau, \ - std::int64_t stride_tau, std::int64_t batch_size, TYPE *scratchpad, \ + sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t stride_a, TYPE* tau, \ + std::int64_t stride_tau, std::int64_t batch_size, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return geqrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -582,24 +583,24 @@ GEQRF_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgeqrf) #undef GEQRF_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event geqrf_batch(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, T **a, std::int64_t *lda, T **tau, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event geqrf_batch(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, T** a, std::int64_t* lda, T** tau, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(group_count, scratchpad_size); for (int64_t i = 0; i < group_count; ++i) overflow_check(m[i], n[i], lda[i], group_sizes[i]); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); int64_t global_id = 0; cusolverStatus_t err; @@ -620,9 +621,9 @@ inline sycl::event geqrf_batch(const char *func_name, Func func, sycl::queue &qu #define GEQRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event geqrf_batch( \ - sycl::queue &queue, std::int64_t *m, std::int64_t *n, TYPE **a, std::int64_t *lda, \ - TYPE **tau, std::int64_t group_count, std::int64_t *group_sizes, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ + sycl::queue& queue, std::int64_t* m, std::int64_t* n, TYPE** a, std::int64_t* lda, \ + TYPE** tau, std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ return geqrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, tau, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -635,11 +636,11 @@ GEQRF_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgeqrf) #undef GEQRF_BATCH_LAUNCHER_USM template -inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event getrf_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, stride_a, stride_ipiv, batch_size, scratchpad_size); @@ -648,17 +649,17 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = stride_ipiv * batch_size; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); - int *devInfo = (int *)malloc_device(sizeof(int) * batch_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); + int* devInfo = (int*)malloc_device(sizeof(int) * batch_size, queue); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratchpad_ = reinterpret_cast(scratchpad); - auto ipiv_ = reinterpret_cast(ipiv32); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratchpad_ = reinterpret_cast(scratchpad); + auto ipiv_ = reinterpret_cast(ipiv32); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -670,14 +671,14 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu }); // Copy from 32-bit USM to 64-bit - sycl::event done_casting = queue.submit([&](sycl::handler &cgh) { + sycl::event done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = ipiv32[index]; }); }); // Enqueue free memory, don't return event as not-neccessary for user to wait for ipiv32 being released - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done_casting); cgh.host_task([=](sycl::interop_handle ih) { sycl::free(ipiv32, queue); }); }); @@ -690,11 +691,11 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu } #define GETRF_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, \ - std::int64_t stride_ipiv, std::int64_t batch_size, TYPE *scratchpad, \ + sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, \ + std::int64_t stride_ipiv, std::int64_t batch_size, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return getrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, stride_a, \ ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -708,12 +709,12 @@ GETRF_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgetrf) #undef GETRF_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, T **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, T *scratchpad, +inline sycl::event getrf_batch(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, T** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; int64_t batch_size = 0; @@ -726,19 +727,19 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu // cuSolver legacy api does not accept 64-bit ints. // To get around the limitation. // Allocate memory with 32-bit ints then copy over results - int **ipiv32 = (int **)malloc(sizeof(int *) * batch_size); + int** ipiv32 = (int**)malloc(sizeof(int*) * batch_size); int64_t global_id = 0; for (int64_t group_id = 0; group_id < group_count; ++group_id) for (int64_t local_id = 0; local_id < group_sizes[group_id]; ++local_id, ++global_id) - ipiv32[global_id] = (int *)malloc_device(sizeof(int) * n[group_id], queue); - int *devInfo = (int *)malloc_device(sizeof(int) * batch_size, queue); + ipiv32[global_id] = (int*)malloc_device(sizeof(int) * n[group_id], queue); + int* devInfo = (int*)malloc_device(sizeof(int) * batch_size, queue); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto scratch_ = reinterpret_cast(scratchpad); int64_t global_id = 0; cusolverStatus_t err; @@ -759,10 +760,10 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu for (int64_t group_id = 0, global_id = 0; group_id < group_count; ++group_id) { uint64_t ipiv_size = n[group_id]; for (int64_t local_id = 0; local_id < group_sizes[group_id]; ++local_id, ++global_id) { - int64_t *d_ipiv = ipiv[global_id]; - int *d_ipiv32 = ipiv32[global_id]; + int64_t* d_ipiv = ipiv[global_id]; + int* d_ipiv32 = ipiv32[global_id]; - sycl::event e = queue.submit([&](sycl::handler &cgh) { + sycl::event e = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { d_ipiv[index] = d_ipiv32[index]; }); @@ -772,7 +773,7 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu } // Enqueue free memory - sycl::event done_freeing = queue.submit([&](sycl::handler &cgh) { + sycl::event done_freeing = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(casting_dependencies); cgh.host_task([=](sycl::interop_handle ih) { for (int64_t global_id = 0; global_id < batch_size; ++global_id) @@ -789,11 +790,11 @@ inline sycl::event getrf_batch(const char *func_name, Func func, sycl::queue &qu } #define GETRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, TYPE **a, \ - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, \ + sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, TYPE** a, \ + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return getrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -806,20 +807,20 @@ GETRF_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgetrf) #undef GETRS_BATCH_LAUNCHER_USM template -sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t n, T *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, T *scratchpad, +sycl::event getri_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t n, T* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, stride_a, stride_ipiv, batch_size, scratchpad_size); std::uint64_t ipiv32_size = n * batch_size; - int *ipiv32 = sycl::malloc_device(ipiv32_size, queue); - int *devInfo = sycl::malloc_device(batch_size, queue); + int* ipiv32 = sycl::malloc_device(ipiv32_size, queue); + int* devInfo = sycl::malloc_device(batch_size, queue); - sycl::event done_casting = queue.submit([&](sycl::handler &cgh) { + sycl::event done_casting = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for( sycl::range<1>{ static_cast(ipiv32_size) }, [=](sycl::id<1> index) { ipiv32[index] = static_cast(ipiv[(index / n) * stride_ipiv + index % n]); @@ -829,13 +830,13 @@ sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, st // getri_batched is contained within cublas, not cusolver. For this reason // we need to use cublas types instead of cusolver types (as is needed for // other lapack routines) - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { using blas::cublas::cublas_error; cgh.depends_on(done_casting); cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { cublasStatus_t err; CUresult cuda_result; cublasHandle_t cublas_handle; @@ -844,23 +845,24 @@ sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, st CUBLAS_ERROR_FUNC(cublasSetStream, err, cublas_handle, cu_stream); CUdeviceptr a_dev; - auto *a_ = reinterpret_cast(a); - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + auto* a_ = reinterpret_cast(a); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); + auto** a_dev_ = reinterpret_cast(a_dev); CUdeviceptr scratch_dev; - auto *scratch_ = reinterpret_cast(scratchpad); - cuDataType **scratch_batched = + auto* scratch_ = reinterpret_cast(scratchpad); + cuDataType** scratch_batched = create_ptr_list_from_stride(scratch_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &scratch_dev, sizeof(T *) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &scratch_dev, sizeof(T*) * batch_size); CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, scratch_dev, scratch_batched, - sizeof(T *) * batch_size); - auto **scratch_dev_ = reinterpret_cast(scratch_dev); + sizeof(T*) * batch_size); + auto** scratch_dev_ = reinterpret_cast(scratch_dev); - blas::cublas::cublas_native_named_func(func_name, func, err, cublas_handle, n, a_dev_, lda, ipiv32, - scratch_dev_, lda, devInfo, batch_size); + blas::cublas::cublas_native_named_func(func_name, func, err, cublas_handle, n, a_dev_, + lda, ipiv32, scratch_dev_, lda, devInfo, + batch_size); free(a_batched); free(scratch_batched); @@ -870,14 +872,14 @@ sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, st }); // The inverted matrices stored in scratch_ need to be stored in a_ - auto copy1 = queue.submit([&](sycl::handler &cgh) { + auto copy1 = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for( sycl::range<1>{ static_cast(stride_a * (batch_size - 1) + lda * n) }, [=](sycl::id<1> index) { a[index] = scratchpad[index]; }); }); - auto copy2 = queue.submit([&](sycl::handler &cgh) { + auto copy2 = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for( sycl::range<1>{ static_cast(ipiv32_size) }, [=](sycl::id<1> index) { @@ -893,9 +895,9 @@ sycl::event getri_batch(const char *func_name, Func func, sycl::queue &queue, st #define GETRI_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event getri_batch( \ - sycl::queue &queue, std::int64_t n, TYPE *a, std::int64_t lda, std::int64_t stride_a, \ - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ + sycl::queue& queue, std::int64_t n, TYPE* a, std::int64_t lda, std::int64_t stride_a, \ + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ return getri_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, n, a, lda, stride_a, ipiv, \ stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); \ } @@ -907,41 +909,41 @@ GETRI_BATCH_LAUNCHER_USM(std::complex, cublasZgetriBatched) #undef GETRI_BATCH_LAUNCHER_USM -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } template -inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - T *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, T *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, T *scratchpad, + T* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, T* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, stride_ipiv, stride_b, batch_size, scratchpad_size); @@ -950,22 +952,22 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu // To get around the limitation. // Create new memory and convert 64-bit values. std::uint64_t ipiv_size = stride_ipiv * batch_size; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv32[index] = static_cast(ipiv[index]); }); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); cgh.depends_on(done_casting); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto ipiv_ = reinterpret_cast(ipiv32); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv32); + auto b_ = reinterpret_cast(b); cusolverStatus_t err; // Does not use scratch so call cuSolver asynchronously and sync at end @@ -986,12 +988,12 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu } #define GETRS_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, std::int64_t stride_a, \ - std::int64_t *ipiv, std::int64_t stride_ipiv, TYPE *b, \ + sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t stride_a, \ + std::int64_t* ipiv, std::int64_t stride_ipiv, TYPE* b, \ std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, \ stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, \ scratchpad_size, dependencies); \ @@ -1005,13 +1007,13 @@ GETRS_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgetrs) #undef GETRS_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - T **a, std::int64_t *lda, std::int64_t **ipiv, T **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, T *scratchpad, +inline sycl::event getrs_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + T** a, std::int64_t* lda, std::int64_t** ipiv, T** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; int64_t batch_size = 0; @@ -1026,17 +1028,17 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu // an array of 64-bit ints in device memory. Each vec of ipiv // values need to be converted from 64-bit to 32-bit. The list // must stay on host. - int **ipiv32 = (int **)malloc(sizeof(int *) * batch_size); + int** ipiv32 = (int**)malloc(sizeof(int*) * batch_size); std::vector casting_dependencies(batch_size); int64_t global_id = 0; for (int64_t group_id = 0; group_id < group_count; ++group_id) { for (int64_t local_id = 0; local_id < group_sizes[group_id]; ++local_id, ++global_id) { uint64_t ipiv_size = n[group_id]; - int *d_group_ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* d_group_ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); ipiv32[global_id] = d_group_ipiv32; - int64_t *d_group_ipiv = ipiv[global_id]; + int64_t* d_group_ipiv = ipiv[global_id]; - auto e = queue.submit([&](sycl::handler &cgh) { + auto e = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { d_group_ipiv32[index] = static_cast(d_group_ipiv[index]); }); @@ -1045,14 +1047,14 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu } } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); cgh.depends_on(casting_dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cusolverStatus_t err; int64_t global_id = 0; @@ -1081,10 +1083,10 @@ inline sycl::event getrs_batch(const char *func_name, Func func, sycl::queue &qu #define GETRS_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event getrs_batch( \ - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, \ - TYPE **a, std::int64_t *lda, std::int64_t **ipiv, TYPE **b, std::int64_t *ldb, \ - std::int64_t group_count, std::int64_t *group_sizes, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, \ + TYPE** a, std::int64_t* lda, std::int64_t** ipiv, TYPE** b, std::int64_t* ldb, \ + std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ return getrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, \ ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, \ dependencies); \ @@ -1098,22 +1100,22 @@ GETRS_BATCH_LAUNCHER_USM(std::complex, cusolverDnZgetrs) #undef GETRS_BATCH_LAUNCHER_USM template -inline sycl::event orgqr_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, - std::int64_t stride_a, T *tau, std::int64_t stride_tau, - std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgqr_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, + std::int64_t stride_a, T* tau, std::int64_t stride_tau, + std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, stride_a, stride_tau, batch_size, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -1129,11 +1131,11 @@ inline sycl::event orgqr_batch(const char *func_name, Func func, sycl::queue &qu } #define ORGQR_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - TYPE *a, std::int64_t lda, std::int64_t stride_a, TYPE *tau, \ - std::int64_t stride_tau, std::int64_t batch_size, TYPE *scratchpad, \ + sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + TYPE* a, std::int64_t lda, std::int64_t stride_a, TYPE* tau, \ + std::int64_t stride_tau, std::int64_t batch_size, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return orgqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -1145,25 +1147,25 @@ ORGQR_STRIDED_BATCH_LAUNCHER_USM(double, cusolverDnDorgqr) #undef ORGQR_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event orgqr_batch(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, T **a, - std::int64_t *lda, T **tau, std::int64_t group_count, - std::int64_t *group_sizes, T *scratchpad, +inline sycl::event orgqr_batch(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, T** a, + std::int64_t* lda, T** tau, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(group_count, scratchpad_size); for (int64_t i = 0; i < group_count; ++i) overflow_check(m[i], n[i], k[i], lda[i], group_sizes[i]); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); int64_t global_id = 0; cusolverStatus_t err; @@ -1184,11 +1186,11 @@ inline sycl::event orgqr_batch(const char *func_name, Func func, sycl::queue &qu } #define ORGQR_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, \ - TYPE **a, std::int64_t *lda, TYPE **tau, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, \ + sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, \ + TYPE** a, std::int64_t* lda, TYPE** tau, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return orgqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -1199,31 +1201,31 @@ ORGQR_BATCH_LAUNCHER_USM(double, cusolverDnDorgqr) #undef ORGQR_BATCH_LAUNCHER_USM template -inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t batch_size, T *scratchpad, +inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, stride_a, batch_size, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); CUdeviceptr a_dev; cusolverStatus_t err; CUresult cuda_result; - auto *a_ = reinterpret_cast(a); + auto* a_ = reinterpret_cast(a); // Transform ptr and stride to list of ptr's - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + auto** a_dev_ = reinterpret_cast(a_dev); cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), (int)n, a_dev_, (int)lda, nullptr, (int)batch_size); @@ -1237,10 +1239,10 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRF_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ + sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, stride_a, \ batch_size, scratchpad, scratchpad_size, dependencies); \ } @@ -1253,11 +1255,11 @@ POTRF_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZpotrfBatched) #undef POTRF_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, T **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, T** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; int64_t batch_size = 0; @@ -1266,19 +1268,19 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu batch_size += group_sizes[i]; } - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; CUdeviceptr a_dev; CUresult cuda_result; cusolverStatus_t err; - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a, sizeof(T *) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); + auto** a_dev_ = reinterpret_cast(a_dev); // Does not use scratch so call cuSolver asynchronously and sync at end for (int64_t i = 0; i < group_count; i++) { @@ -1300,9 +1302,9 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRF_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event potrf_batch( \ - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, TYPE **a, std::int64_t *lda, \ - std::int64_t group_count, std::int64_t *group_sizes, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, TYPE** a, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ return potrf_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -1315,12 +1317,12 @@ POTRF_BATCH_LAUNCHER_USM(std::complex, cusolverDnZpotrfBatched) #undef POTRF_BATCH_LAUNCHER_USM template -inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, std::int64_t stride_a, T *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, T *scratchpad, +inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, std::int64_t stride_a, T* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, stride_a, stride_b, batch_size, scratchpad_size); @@ -1329,26 +1331,26 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu if (nrhs != 1) throw unimplemented("lapack", "potrs_batch", "cusolver potrs_batch only supports nrhs = 1"); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); CUresult cuda_result; CUdeviceptr a_dev, b_dev; - auto *a_ = reinterpret_cast(a); - auto *b_ = reinterpret_cast(b); + auto* a_ = reinterpret_cast(a); + auto* b_ = reinterpret_cast(b); cusolverStatus_t err; // Transform ptr and stride to list of ptr's - cuDataType **a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); - cuDataType **b_batched = create_ptr_list_from_stride(b_, stride_b, batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &b_dev, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T *) * batch_size); - CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, b_dev, b_batched, sizeof(T *) * batch_size); + cuDataType** a_batched = create_ptr_list_from_stride(a_, stride_a, batch_size); + cuDataType** b_batched = create_ptr_list_from_stride(b_, stride_b, batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &a_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemAlloc, cuda_result, &b_dev, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, a_dev, a_batched, sizeof(T*) * batch_size); + CUDA_ERROR_FUNC(cuMemcpyHtoD, cuda_result, b_dev, b_batched, sizeof(T*) * batch_size); - auto **a_dev_ = reinterpret_cast(a_dev); - auto **b_dev_ = reinterpret_cast(b_dev); + auto** a_dev_ = reinterpret_cast(a_dev); + auto** b_dev_ = reinterpret_cast(b_dev); cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), (int)n, (int)nrhs, a_dev_, (int)lda, b_dev_, ldb, nullptr, @@ -1365,10 +1367,10 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRS_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event potrs_batch( \ - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, TYPE *a, \ - std::int64_t lda, std::int64_t stride_a, TYPE *b, std::int64_t ldb, std::int64_t stride_b, \ - std::int64_t batch_size, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, TYPE* a, \ + std::int64_t lda, std::int64_t stride_a, TYPE* b, std::int64_t ldb, std::int64_t stride_b, \ + std::int64_t batch_size, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, \ stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -1382,12 +1384,12 @@ POTRS_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZpotrsBatched) #undef POTRS_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, T **a, - std::int64_t *lda, T **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, T** a, + std::int64_t* lda, T** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; int64_t batch_size = 0; @@ -1401,29 +1403,29 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu "cusolver potrs_batch only supports nrhs = 1"); } - int *info = (int *)malloc_device(sizeof(int *) * batch_size, queue); - T **a_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); - T **b_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); + int* info = (int*)malloc_device(sizeof(int*) * batch_size, queue); + T** a_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); + T** b_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); auto done_cpy_a = - queue.submit([&](sycl::handler &h) { h.memcpy(a_dev, a, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(a_dev, a, batch_size * sizeof(T*)); }); auto done_cpy_b = - queue.submit([&](sycl::handler &h) { h.memcpy(b_dev, b, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(b_dev, b, batch_size * sizeof(T*)); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); cgh.depends_on(done_cpy_a); cgh.depends_on(done_cpy_b); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; cusolverStatus_t err; // Does not use scratch so call cuSolver asynchronously and sync at end for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a_dev); - auto **b_ = reinterpret_cast(b_dev); - auto info_ = reinterpret_cast(info); + auto** a_ = reinterpret_cast(a_dev); + auto** b_ = reinterpret_cast(b_dev); + auto info_ = reinterpret_cast(info); CUSOLVER_ERROR_FUNC_T(func_name, func, err, handle, get_cublas_fill_mode(uplo[i]), (int)n[i], (int)nrhs[i], a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], info_, (int)group_sizes[i]); @@ -1440,10 +1442,10 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRS_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ sycl::event potrs_batch( \ - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, \ - TYPE **a, std::int64_t *lda, TYPE **b, std::int64_t *ldb, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, \ + TYPE** a, std::int64_t* lda, TYPE** b, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, \ ldb, group_count, group_sizes, scratchpad, scratchpad_size, \ dependencies); \ @@ -1457,22 +1459,22 @@ POTRS_BATCH_LAUNCHER_USM(std::complex, cusolverDnZpotrsBatched) #undef POTRS_BATCH_LAUNCHER_USM template -inline sycl::event ungqr_batch(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, - std::int64_t stride_a, T *tau, std::int64_t stride_tau, - std::int64_t batch_size, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungqr_batch(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, + std::int64_t stride_a, T* tau, std::int64_t stride_tau, + std::int64_t batch_size, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, stride_a, stride_tau, batch_size, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; // Uses scratch so sync between each cuSolver call @@ -1488,11 +1490,11 @@ inline sycl::event ungqr_batch(const char *func_name, Func func, sycl::queue &qu } #define UNGQR_STRIDED_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - TYPE *a, std::int64_t lda, std::int64_t stride_a, TYPE *tau, \ - std::int64_t stride_tau, std::int64_t batch_size, TYPE *scratchpad, \ + sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + TYPE* a, std::int64_t lda, std::int64_t stride_a, TYPE* tau, \ + std::int64_t stride_tau, std::int64_t batch_size, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ungqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, stride_a, \ tau, stride_tau, batch_size, scratchpad, scratchpad_size, \ dependencies); \ @@ -1504,25 +1506,25 @@ UNGQR_STRIDED_BATCH_LAUNCHER_USM(std::complex, cusolverDnZungqr) #undef UNGQR_STRIDED_BATCH_LAUNCHER_USM template -inline sycl::event ungqr_batch(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, T **a, - std::int64_t *lda, T **tau, std::int64_t group_count, - std::int64_t *group_sizes, T *scratchpad, +inline sycl::event ungqr_batch(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, T** a, + std::int64_t* lda, T** tau, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(group_count, scratchpad_size); for (int64_t i = 0; i < group_count; ++i) overflow_check(m[i], n[i], k[i], lda[i], group_sizes[i]); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); int64_t global_id = 0; cusolverStatus_t err; @@ -1543,11 +1545,11 @@ inline sycl::event ungqr_batch(const char *func_name, Func func, sycl::queue &qu } #define UNGQR_BATCH_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, \ - TYPE **a, std::int64_t *lda, TYPE **tau, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, \ + sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, \ + TYPE** a, std::int64_t* lda, TYPE** tau, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ungqr_batch(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -1560,12 +1562,12 @@ UNGQR_BATCH_LAUNCHER_USM(std::complex, cusolverDnZungqr) // BATCH SCRATCHPAD API template -inline void getrf_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void getrf_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, - std::int64_t batch_size, int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + std::int64_t batch_size, int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cusolverStatus_t err; @@ -1628,12 +1630,12 @@ GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex) #undef GETRS_STRIDED_BATCH_LAUNCHER_SCRATCH template -inline void geqrf_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void geqrf_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, - std::int64_t batch_size, int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + std::int64_t batch_size, int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cusolverStatus_t err; @@ -1695,13 +1697,13 @@ POTRS_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex) #undef POTRS_STRIDED_BATCH_LAUNCHER_SCRATCH template -inline void orgqr_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void orgqr_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size, - int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cusolverStatus_t err; @@ -1729,13 +1731,13 @@ ORGQR_STRIDED_BATCH_LAUNCHER_SCRATCH(double, cusolverDnDorgqr_bufferSize) #undef ORGQR_STRIDED_BATCH_LAUNCHER_SCRATCH template -inline void ungqr_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ungqr_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size, - int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); cusolverStatus_t err; @@ -1763,12 +1765,12 @@ ORGQR_STRIDED_BATCH_LAUNCHER_SCRATCH(std::complex, cusolverDnZungqr_buff #undef ORGQR_STRIDED_BATCH_LAUNCHER_SCRATCH template -inline void getrf_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { +inline void getrf_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int group_scratch_size = 0; *scratch_size = 0; @@ -1789,8 +1791,8 @@ inline void getrf_batch_scratchpad_size(const char *func_name, Func func, sycl:: #define GETRF_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ std::int64_t getrf_batch_scratchpad_size( \ - sycl::queue & queue, std::int64_t * m, std::int64_t * n, std::int64_t * lda, \ - std::int64_t group_count, std::int64_t * group_sizes) { \ + sycl::queue & queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ int scratch_size; \ getrf_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, lda, \ group_count, group_sizes, &scratch_size); \ @@ -1804,18 +1806,18 @@ GETRF_GROUP_LAUNCHER_SCRATCH(std::complex, cusolverDnZgetrf_bufferSize) #undef GETRF_GROUP_LAUNCHER_SCRATCH -#define GETRI_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t getri_batch_scratchpad_size(sycl::queue & queue, std::int64_t * n, \ - std::int64_t * lda, std::int64_t group_count, \ - std::int64_t * group_sizes) { \ - std::int64_t max_scratch_sz = 0; \ - for (auto group_id = 0; group_id < group_count; ++group_id) { \ - auto scratch_sz = lda[group_id] * n[group_id]; \ - if (scratch_sz > max_scratch_sz) \ - max_scratch_sz = scratch_sz; \ - } \ - return max_scratch_sz; \ +#define GETRI_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t getri_batch_scratchpad_size(sycl::queue & queue, std::int64_t* n, \ + std::int64_t* lda, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + std::int64_t max_scratch_sz = 0; \ + for (auto group_id = 0; group_id < group_count; ++group_id) { \ + auto scratch_sz = lda[group_id] * n[group_id]; \ + if (scratch_sz > max_scratch_sz) \ + max_scratch_sz = scratch_sz; \ + } \ + return max_scratch_sz; \ } GETRI_GROUP_LAUNCHER_SCRATCH(float) @@ -1825,13 +1827,13 @@ GETRI_GROUP_LAUNCHER_SCRATCH(std::complex) #undef GETRI_GROUP_LAUNCHER_SCRATCH -#define GETRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t getrs_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::mkl::transpose * trans, std::int64_t * n, \ - std::int64_t * nrhs, std::int64_t * lda, std::int64_t * ldb, std::int64_t group_count, \ - std::int64_t * group_sizes) { \ - return 0; \ +#define GETRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t getrs_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::mkl::transpose * trans, std::int64_t* n, std::int64_t* nrhs, \ + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + return 0; \ } GETRS_GROUP_LAUNCHER_SCRATCH(float) @@ -1842,12 +1844,12 @@ GETRS_GROUP_LAUNCHER_SCRATCH(std::complex) #undef GETRS_GROUP_LAUNCHER_SCRATCH template -inline void geqrf_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { +inline void geqrf_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int group_scratch_size = 0; *scratch_size = 0; @@ -1868,8 +1870,8 @@ inline void geqrf_batch_scratchpad_size(const char *func_name, Func func, sycl:: #define GEQRF_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ template <> \ std::int64_t geqrf_batch_scratchpad_size( \ - sycl::queue & queue, std::int64_t * m, std::int64_t * n, std::int64_t * lda, \ - std::int64_t group_count, std::int64_t * group_sizes) { \ + sycl::queue & queue, std::int64_t* m, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ int scratch_size; \ geqrf_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, lda, \ group_count, group_sizes, &scratch_size); \ @@ -1884,12 +1886,12 @@ GEQRF_GROUP_LAUNCHER_SCRATCH(std::complex, cusolverDnZgeqrf_bufferSize) #undef GEQRF_GROUP_LAUNCHER_SCRATCH template -inline void orgqr_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { +inline void orgqr_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int group_scratch_size = 0; *scratch_size = 0; @@ -1908,15 +1910,15 @@ inline void orgqr_batch_scratchpad_size(const char *func_name, Func func, sycl:: e.wait(); } -#define ORGQR_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ - std::int64_t orgqr_batch_scratchpad_size( \ - sycl::queue & queue, std::int64_t * m, std::int64_t * n, std::int64_t * k, \ - std::int64_t * lda, std::int64_t group_count, std::int64_t * group_sizes) { \ - int scratch_size; \ - orgqr_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, lda, \ - group_count, group_sizes, &scratch_size); \ - return scratch_size; \ +#define ORGQR_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ + std::int64_t orgqr_batch_scratchpad_size( \ + sycl::queue & queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + int scratch_size; \ + orgqr_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, lda, \ + group_count, group_sizes, &scratch_size); \ + return scratch_size; \ } ORGQR_GROUP_LAUNCHER_SCRATCH(float, cusolverDnSorgqr_bufferSize) @@ -1925,12 +1927,12 @@ ORGQR_GROUP_LAUNCHER_SCRATCH(double, cusolverDnDorgqr_bufferSize) #undef ORGQR_GROUP_LAUNCHER_SCRATCH // cusolverDnXpotrfBatched does not use scratchpad memory -#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrf_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t * n, std::int64_t * lda, \ - std::int64_t group_count, std::int64_t * group_sizes) { \ - return 0; \ +#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrf_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + return 0; \ } POTRF_GROUP_LAUNCHER_SCRATCH(float) @@ -1941,13 +1943,13 @@ POTRF_GROUP_LAUNCHER_SCRATCH(std::complex) #undef POTRF_GROUP_LAUNCHER_SCRATCH // cusolverDnXpotrsBatched does not use scratchpad memory -#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrs_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t * n, std::int64_t * nrhs, \ - std::int64_t * lda, std::int64_t * ldb, std::int64_t group_count, \ - std::int64_t * group_sizes) { \ - return 0; \ +#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrs_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \ + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + return 0; \ } POTRS_GROUP_LAUNCHER_SCRATCH(float) @@ -1958,12 +1960,12 @@ POTRS_GROUP_LAUNCHER_SCRATCH(std::complex) #undef POTRS_GROUP_LAUNCHER_SCRATCH template -inline void ungqr_batch_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, int *scratch_size) { - auto e = queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { +inline void ungqr_batch_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, int* scratch_size) { + auto e = queue.submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int group_scratch_size = 0; *scratch_size = 0; @@ -1982,15 +1984,15 @@ inline void ungqr_batch_scratchpad_size(const char *func_name, Func func, sycl:: e.wait(); } -#define UNGQR_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ - template <> \ - std::int64_t ungqr_batch_scratchpad_size( \ - sycl::queue & queue, std::int64_t * m, std::int64_t * n, std::int64_t * k, \ - std::int64_t * lda, std::int64_t group_count, std::int64_t * group_sizes) { \ - int scratch_size; \ - ungqr_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, lda, \ - group_count, group_sizes, &scratch_size); \ - return scratch_size; \ +#define UNGQR_GROUP_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ + template <> \ + std::int64_t ungqr_batch_scratchpad_size( \ + sycl::queue & queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + int scratch_size; \ + ungqr_batch_scratchpad_size(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, lda, \ + group_count, group_sizes, &scratch_size); \ + return scratch_size; \ } UNGQR_GROUP_LAUNCHER_SCRATCH(std::complex, cusolverDnCungqr_bufferSize) diff --git a/src/lapack/backends/cusolver/cusolver_handle.hpp b/src/lapack/backends/cusolver/cusolver_handle.hpp index f3b587039..75d589b06 100644 --- a/src/lapack/backends/cusolver/cusolver_handle.hpp +++ b/src/lapack/backends/cusolver/cusolver_handle.hpp @@ -28,10 +28,10 @@ namespace cusolver { template struct cusolver_handle { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t cusolver_handle_mapper_{}; ~cusolver_handle() noexcept(false) { - for (auto &handle_pair : cusolver_handle_mapper_) { + for (auto& handle_pair : cusolver_handle_mapper_) { cusolverStatus_t err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); diff --git a/src/lapack/backends/cusolver/cusolver_helper.hpp b/src/lapack/backends/cusolver/cusolver_helper.hpp index 954d41246..425993d45 100644 --- a/src/lapack/backends/cusolver/cusolver_helper.hpp +++ b/src/lapack/backends/cusolver/cusolver_helper.hpp @@ -82,7 +82,7 @@ void overflow_check(Index index, Next... indices) { class cusolver_error : virtual public std::runtime_error { protected: - inline const char *cusolver_error_map(cusolverStatus_t error) { + inline const char* cusolver_error_map(cusolverStatus_t error) { switch (error) { case CUSOLVER_STATUS_SUCCESS: return "CUSOLVER_STATUS_SUCCESS"; @@ -131,7 +131,7 @@ class cusolver_error : virtual public std::runtime_error { class cuda_error : virtual public std::runtime_error { protected: - inline const char *cuda_error_map(CUresult result) { + inline const char* cuda_error_map(CUresult result) { switch (result) { case CUDA_SUCCESS: return "CUDA_SUCCESS"; case CUDA_ERROR_NOT_PERMITTED: return "CUDA_ERROR_NOT_PERMITTED"; @@ -201,9 +201,8 @@ class cuda_error : virtual public std::runtime_error { CUSOLVER_SYNC(err, handle) template -inline void cusolver_native_named_func(const char *func_name, Func func, - cusolverStatus_t err, - cusolverDnHandle_t handle, Types... args){ +inline void cusolver_native_named_func(const char* func_name, Func func, cusolverStatus_t err, + cusolverDnHandle_t handle, Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND CUSOLVER_ERROR_FUNC_T(func_name, func, err, handle, args...) #else @@ -291,25 +290,25 @@ struct CudaEquivalentType> { /* devinfo */ -inline void get_cusolver_devinfo(sycl::queue &queue, sycl::buffer &devInfo, - std::vector &dev_info_) { +inline void get_cusolver_devinfo(sycl::queue& queue, sycl::buffer& devInfo, + std::vector& dev_info_) { sycl::host_accessor dev_info_acc{ devInfo }; for (unsigned int i = 0; i < dev_info_.size(); ++i) dev_info_[i] = dev_info_acc[i]; } -inline void get_cusolver_devinfo(sycl::queue &queue, const int *devInfo, - std::vector &dev_info_) { +inline void get_cusolver_devinfo(sycl::queue& queue, const int* devInfo, + std::vector& dev_info_) { queue.wait(); queue.memcpy(dev_info_.data(), devInfo, sizeof(int)); } template -inline void lapack_info_check(sycl::queue &queue, DEVINFO_T devinfo, const char *func_name, - const char *cufunc_name, int dev_info_size = 1) { +inline void lapack_info_check(sycl::queue& queue, DEVINFO_T devinfo, const char* func_name, + const char* cufunc_name, int dev_info_size = 1) { std::vector dev_info_(dev_info_size); get_cusolver_devinfo(queue, devinfo, dev_info_); - for (const auto &val : dev_info_) { + for (const auto& val : dev_info_) { if (val > 0) throw oneapi::mkl::lapack::computation_error( func_name, std::string(cufunc_name) + " failed with info = " + std::to_string(val), @@ -322,8 +321,8 @@ inline void lapack_info_check(sycl::queue &queue, DEVINFO_T devinfo, const char // Creates list of matrix/vector pointers from initial ptr and stride // Note: user is responsible for deallocating memory template -T **create_ptr_list_from_stride(T *ptr, int64_t ptr_stride, int64_t batch_size) { - T **ptr_list = (T **)malloc(sizeof(T *) * batch_size); +T** create_ptr_list_from_stride(T* ptr, int64_t ptr_stride, int64_t batch_size) { + T** ptr_list = (T**)malloc(sizeof(T*) * batch_size); for (int64_t i = 0; i < batch_size; i++) ptr_list[i] = ptr + i * ptr_stride; diff --git a/src/lapack/backends/cusolver/cusolver_lapack.cpp b/src/lapack/backends/cusolver/cusolver_lapack.cpp index c8190f50d..2e3176156 100644 --- a/src/lapack/backends/cusolver/cusolver_lapack.cpp +++ b/src/lapack/backends/cusolver/cusolver_lapack.cpp @@ -30,10 +30,10 @@ namespace cusolver { // BUFFER APIs template -inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void gebrd(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -41,21 +41,21 @@ inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int if (m < n) throw unimplemented("lapack", "gebrd", "cusolver gebrd does not support m < n"); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tauq_acc = tauq.template get_access(cgh); auto taup_acc = taup.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tauq_ = sc.get_mem(tauq_acc); - auto taup_ = sc.get_mem(taup_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tauq_ = sc.get_mem(tauq_acc); + auto taup_ = sc.get_mem(taup_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, d_, e_, tauq_, taup_, scratch_, scratchpad_size, nullptr); @@ -64,10 +64,10 @@ inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int } #define GEBRD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, \ - sycl::buffer &tauq, sycl::buffer &taup, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tauq, sycl::buffer& taup, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ gebrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, \ scratchpad, scratchpad_size); \ } @@ -79,43 +79,43 @@ GEBRD_LAUNCHER(std::complex, double, cusolverDnZgebrd) #undef GEBRD_LAUNCHER -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } template -inline void geqrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void geqrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -124,8 +124,8 @@ inline void geqrf(const char *func_name, Func func, sycl::queue &queue, std::int } #define GEQRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ + void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ geqrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, tau, scratchpad, \ scratchpad_size); \ @@ -139,9 +139,9 @@ GEQRF_LAUNCHER(std::complex, cusolverDnZgeqrf) #undef GEQRF_LAUNCHER template -void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -152,17 +152,17 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, scratch_, ipiv32_, devInfo_); @@ -170,7 +170,7 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, }); // Copy from 32-bit buffer to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -181,8 +181,8 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, } #define GETRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ getrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, \ scratchpad_size); \ @@ -196,8 +196,8 @@ GETRF_LAUNCHER(std::complex, cusolverDnZgetrf) #undef GETRF_LAUNCHER #define GETRI_LAUNCHER(TYPE) \ - void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ return getri_batch(queue, n, a, lda, lda * n, ipiv, n, 1, scratchpad, scratchpad_size); \ } @@ -211,10 +211,10 @@ GETRI_LAUNCHER(std::complex) // cusolverDnXgetrs does not use scratchpad memory template -inline void getrs(const char *func_name, Func func, sycl::queue &queue, +inline void getrs(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb); @@ -225,7 +225,7 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, std::uint64_t ipiv_size = ipiv.size(); sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -233,15 +233,15 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv_acc = ipiv32.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv_ = sc.get_mem(ipiv_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv_ = sc.get_mem(ipiv_acc); + auto b_ = sc.get_mem(b_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb, nullptr); @@ -250,10 +250,10 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, \ - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ getrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, b, ldb, \ scratchpad, scratchpad_size); \ } @@ -266,30 +266,30 @@ GETRS_LAUNCHER(std::complex, cusolverDnZgetrs) #undef GETRS_LAUNCHER template -inline void gesvd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, +inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, m, lda, ldu, ldvt, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto s_acc = s.template get_access(cgh); auto u_acc = u.template get_access(cgh); auto vt_acc = vt.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto s_ = sc.get_mem(s_acc); - auto u_ = sc.get_mem(u_acc); - auto vt_ = sc.get_mem(vt_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto s_ = sc.get_mem(s_acc); + auto u_ = sc.get_mem(u_acc); + auto vt_ = sc.get_mem(vt_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; // rwork is set to nullptr. If set it is filled with information from the superdiagonal. cusolver_native_named_func(func_name, func, err, handle, get_cusolver_jobsvd(jobu), @@ -301,10 +301,10 @@ inline void gesvd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define GESVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, \ - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, \ - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, \ + void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, \ + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ gesvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, u, ldu, \ vt, ldvt, scratchpad, scratchpad_size); \ @@ -318,25 +318,25 @@ GESVD_LAUNCHER(std::complex, double, cusolverDnZgesvd) #undef GESVD_LAUNCHER template -inline void heevd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -347,9 +347,9 @@ inline void heevd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define HEEVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ heevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ scratchpad_size); \ } @@ -360,28 +360,28 @@ HEEVD_LAUNCHER(std::complex, double, cusolverDnZheevd) #undef HEEVD_LAUNCHER template -inline void hegvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, +inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_itype(itype), get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, @@ -392,10 +392,10 @@ inline void hegvd(const char *func_name, Func func, sycl::queue &queue, std::int } #define HEGVD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, \ - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, \ + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ hegvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w, \ scratchpad, scratchpad_size); \ } @@ -406,29 +406,29 @@ HEGVD_LAUNCHER(std::complex, double, cusolverDnZhegvd) #undef HEGVD_LAUNCHER template -inline void hetrd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tau_ = sc.get_mem(tau_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tau_ = sc.get_mem(tau_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, d_, e_, tau_, scratch_, scratchpad_size, devInfo_); @@ -438,10 +438,10 @@ inline void hetrd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define HETRD_LAUNCHER(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, \ - sycl::buffer &e, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, \ + sycl::buffer& e, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ hetrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, \ scratchpad_size); \ } @@ -451,34 +451,34 @@ HETRD_LAUNCHER(std::complex, double, cusolverDnZhetrd) #undef HETRD_LAUNCHER -void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "hetrf"); } -void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "hetrf"); } template -inline void orgbr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -487,9 +487,9 @@ inline void orgbr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORGBR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, \ - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ orgbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -500,20 +500,20 @@ ORGBR_LAUNCHER(double, cusolverDnDorgbr) #undef ORGBR_LAUNCHER template -inline void orgqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void orgqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -522,9 +522,9 @@ inline void orgqr(const char *func_name, Func func, sycl::queue &queue, std::int } #define ORGQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ orgqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -535,20 +535,20 @@ ORGQR_LAUNCHER(double, cusolverDnDorgqr) #undef ORGQR_LAUNCHER template -inline void orgtr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -557,8 +557,8 @@ inline void orgtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ + void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ orgtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ scratchpad_size); \ @@ -570,24 +570,24 @@ ORGTR_LAUNCHER(double, cusolverDnDorgtr) #undef ORGTR_LAUNCHER template -inline void ormtr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::side side, +inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, @@ -598,10 +598,10 @@ inline void ormtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORMTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ + void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ ormtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ @@ -612,37 +612,37 @@ ORMTR_LAUNCHER(double, cusolverDnDormtr) #undef ORMTR_LAUNCHER -void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ormrq"); } -void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ormrq"); } template -inline void ormqr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::side side, +inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_operation(trans), m, n, k, a_, lda, tau_, c_, ldc, @@ -652,10 +652,10 @@ inline void ormqr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORMQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ormqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ } @@ -666,21 +666,21 @@ ORMQR_LAUNCHER(double, cusolverDnDormqr) #undef ORMQR_LAUNCHER template -inline void potrf(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, scratch_, scratchpad_size, devInfo_); @@ -690,8 +690,8 @@ inline void potrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define POTRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ potrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size); \ } @@ -704,21 +704,21 @@ POTRF_LAUNCHER(std::complex, cusolverDnZpotrf) #undef POTRF_LAUNCHER template -inline void potri(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, scratch_, scratchpad_size, devInfo_); @@ -728,8 +728,8 @@ inline void potri(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define POTRI_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ potri(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size); \ } @@ -743,19 +743,19 @@ POTRI_LAUNCHER(std::complex, cusolverDnZpotri) // cusolverDnXpotrs does not use scratchpad memory template -inline void potrs(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb, nullptr); @@ -764,9 +764,9 @@ inline void potrs(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define POTRS_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ potrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, \ scratchpad, scratchpad_size); \ } @@ -779,23 +779,23 @@ POTRS_LAUNCHER(std::complex, cusolverDnZpotrs) #undef POTRS_LAUNCHER template -inline void syevd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -806,9 +806,9 @@ inline void syevd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYEVD_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ syevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ scratchpad_size); \ } @@ -819,26 +819,26 @@ SYEVD_LAUNCHER(double, cusolverDnDsyevd) #undef SYEVD_LAUNCHER template -inline void sygvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_itype(itype), get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, @@ -849,10 +849,10 @@ inline void sygvd(const char *func_name, Func func, sycl::queue &queue, std::int } #define SYGVD_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, \ - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, \ + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ sygvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w, \ scratchpad, scratchpad_size); \ } @@ -863,28 +863,28 @@ SYGVD_LAUNCHER(double, cusolverDnDsygvd) #undef SYGVD_LAUNCH template -inline void sytrd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tau_ = sc.get_mem(tau_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tau_ = sc.get_mem(tau_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, d_, e_, tau_, scratch_, scratchpad_size, devInfo_); @@ -894,9 +894,9 @@ inline void sytrd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYTRD_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, \ - sycl::buffer &tau, sycl::buffer &scratchpad, \ + void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ sytrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, \ scratchpad_size); \ @@ -908,9 +908,9 @@ SYTRD_LAUNCHER(double, cusolverDnDsytrd) #undef SYTRD_LAUNCHER template -inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); @@ -922,17 +922,17 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: std::uint64_t ipiv_size = n; sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, ipiv32_, scratch_, scratchpad_size, devInfo_); @@ -940,7 +940,7 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); // Copy from 32-bit buffer to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -951,8 +951,8 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYTRF_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ sytrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad, \ scratchpad_size); \ @@ -965,49 +965,49 @@ SYTRF_LAUNCHER(std::complex, cusolverDnZsytrf) #undef SYTRF_LAUNCHER -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } template -inline void ungbr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1016,9 +1016,9 @@ inline void ungbr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNGBR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, \ - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ungbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -1029,20 +1029,20 @@ UNGBR_LAUNCHER(std::complex, cusolverDnZungbr) #undef UNGBR_LAUNCHER template -inline void ungqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void ungqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1051,9 +1051,9 @@ inline void ungqr(const char *func_name, Func func, sycl::queue &queue, std::int } #define UNGQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ungqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -1064,20 +1064,20 @@ UNGQR_LAUNCHER(std::complex, cusolverDnZungqr) #undef UNGQR_LAUNCHER template -inline void ungtr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1086,8 +1086,8 @@ inline void ungtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNGTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ + void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ ungtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ scratchpad_size); \ @@ -1098,39 +1098,39 @@ UNGTR_LAUNCHER(std::complex, cusolverDnZungtr) #undef UNGTR_LAUNCHER -void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "unmrq"); } -void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "unmrq"); } template -inline void unmqr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::side side, +inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_operation(trans), m, n, k, a_, lda, tau_, c_, ldc, @@ -1140,10 +1140,10 @@ inline void unmqr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNMQR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ unmqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ } @@ -1154,24 +1154,24 @@ UNMQR_LAUNCHER(std::complex, cusolverDnZunmqr) #undef UNMQR_LAUNCHER template -inline void unmtr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::side side, +inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); + auto scratch_ = sc.get_mem(scratch_acc); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, @@ -1182,10 +1182,10 @@ inline void unmtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNMTR_LAUNCHER(TYPE, CUSOLVER_ROUTINE) \ - void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ + void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ unmtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ @@ -1199,10 +1199,10 @@ UNMTR_LAUNCHER(std::complex, cusolverDnZunmtr) // USM APIs template -inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T_A *a, std::int64_t lda, T_B *d, T_B *e, T_A *tauq, - T_A *taup, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event gebrd(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T_A* a, std::int64_t lda, T_B* d, T_B* e, T_A* tauq, + T_A* taup, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -1210,19 +1210,19 @@ inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, s if (m < n) throw unimplemented("lapack", "gebrd", "cusolver gebrd does not support m < n"); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tauq_ = reinterpret_cast(tauq); - auto taup_ = reinterpret_cast(taup); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tauq_ = reinterpret_cast(tauq); + auto taup_ = reinterpret_cast(taup); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, d_, e_, tauq_, taup_, scratch_, scratchpad_size, nullptr); @@ -1232,10 +1232,10 @@ inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, s } #define GEBRD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE_A *a, \ - std::int64_t lda, TYPE_B *d, TYPE_B *e, TYPE_A *tauq, TYPE_A *taup, \ - TYPE_A *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tauq, TYPE_A* taup, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return gebrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1247,44 +1247,44 @@ GEBRD_LAUNCHER_USM(std::complex, double, cusolverDnZgebrd) #undef GEBRD_LAUNCHER_USM -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } template -inline sycl::event geqrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, T *tau, T *scratchpad, +inline sycl::event geqrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1294,9 +1294,9 @@ inline sycl::event geqrf(const char *func_name, Func func, sycl::queue &queue, s } #define GEQRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return geqrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1309,10 +1309,10 @@ GEQRF_LAUNCHER_USM(std::complex, cusolverDnZgeqrf) #undef GEQRF_LAUNCHER_USM template -inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, std::int64_t *ipiv, T *scratchpad, +inline sycl::event getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -1320,20 +1320,20 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = std::min(n, m); - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); - auto ipiv_ = reinterpret_cast(ipiv32); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); + auto ipiv_ = reinterpret_cast(ipiv32); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, scratch_, ipiv_, devInfo_); @@ -1341,7 +1341,7 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s }); // Copy from 32-bit USM to 64-bit - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = static_cast(ipiv32[index]); @@ -1358,10 +1358,10 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s } #define GETRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t *ipiv, TYPE *scratchpad, \ + sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return getrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1374,9 +1374,9 @@ GETRF_LAUNCHER_USM(std::complex, cusolverDnZgetrf) #undef GETRF_LAUNCHER_USM #define GETRI_LAUNCHER_USM(TYPE) \ - sycl::event getri(sycl::queue &queue, std::int64_t n, TYPE *a, std::int64_t lda, \ - std::int64_t *ipiv, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event getri(sycl::queue& queue, std::int64_t n, TYPE* a, std::int64_t lda, \ + std::int64_t* ipiv, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return getri_batch(queue, n, a, lda, lda * n, ipiv, n, 1, scratchpad, scratchpad_size, \ dependencies); \ } @@ -1390,11 +1390,11 @@ GETRI_LAUNCHER_USM(std::complex) // cusolverDnXgetrs does not use scratchpad memory template -inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, std::int64_t *ipiv, T *b, std::int64_t ldb, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, std::int64_t* ipiv, T* b, std::int64_t ldb, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); @@ -1402,25 +1402,25 @@ inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, // To get around the limitation. // Create new buffer and convert 64-bit values. std::uint64_t ipiv_size = n; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv32[index] = static_cast(ipiv[index]); }); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } cgh.depends_on(done_casting); - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto ipiv_ = reinterpret_cast(ipiv32); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv32); + auto b_ = reinterpret_cast(b); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb, nullptr); @@ -1435,10 +1435,10 @@ inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, std::int64_t *ipiv, TYPE *b, \ - std::int64_t ldb, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t* ipiv, TYPE* b, \ + std::int64_t ldb, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return getrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, b, \ ldb, scratchpad, scratchpad_size, dependencies); \ } @@ -1451,28 +1451,28 @@ GETRS_LAUNCHER_USM(std::complex, cusolverDnZgetrs) #undef GETRS_LAUNCHER_USM template -inline sycl::event gesvd(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, T_A *a, std::int64_t lda, T_B *s, T_A *u, std::int64_t ldu, - T_A *vt, std::int64_t ldvt, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, T_A* a, std::int64_t lda, T_B* s, T_A* u, std::int64_t ldu, + T_A* vt, std::int64_t ldvt, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldu, ldvt, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto s_ = reinterpret_cast(s); - auto u_ = reinterpret_cast(u); - auto vt_ = reinterpret_cast(vt); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto s_ = reinterpret_cast(s); + auto u_ = reinterpret_cast(u); + auto vt_ = reinterpret_cast(vt); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; // rwork is set to nullptr. If set it is filled with information from the superdiagonal. cusolver_native_named_func(func_name, func, err, handle, get_cusolver_jobsvd(jobu), @@ -1486,11 +1486,11 @@ inline sycl::event gesvd(const char *func_name, Func func, sycl::queue &queue, } #define GESVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, \ - std::int64_t m, std::int64_t n, TYPE_A *a, std::int64_t lda, TYPE_B *s, \ - TYPE_A *u, std::int64_t ldu, TYPE_A *vt, std::int64_t ldvt, \ - TYPE_A *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* s, \ + TYPE_A* u, std::int64_t ldu, TYPE_A* vt, std::int64_t ldvt, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return gesvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, u, \ ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); \ } @@ -1503,25 +1503,25 @@ GESVD_LAUNCHER_USM(std::complex, double, cusolverDnZgesvd) #undef GESVD_LAUNCHER_USM template -inline sycl::event heevd(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A *&a, - std::int64_t lda, T_B *&w, T_A *&scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -1534,10 +1534,10 @@ inline sycl::event heevd(const char *func_name, Func func, sycl::queue &queue, } #define HEEVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, \ - std::int64_t n, TYPE_A *a, std::int64_t lda, TYPE_B *w, TYPE_A *scratchpad, \ + sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, \ + std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* w, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return heevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1548,27 +1548,27 @@ HEEVD_LAUNCHER_USM(std::complex, double, cusolverDnZheevd) #undef HEEVD_LAUNCHER_USM template -inline sycl::event hegvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A *&a, - std::int64_t lda, T_A *&b, std::int64_t ldb, T_B *&w, T_A *&scratchpad, +inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_A*& b, std::int64_t ldb, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_itype(itype), get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, @@ -1581,11 +1581,11 @@ inline sycl::event hegvd(const char *func_name, Func func, sycl::queue &queue, s } #define HEGVD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, \ - oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A *a, std::int64_t lda, \ - TYPE_A *b, std::int64_t ldb, TYPE_B *w, TYPE_A *scratchpad, \ + sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, \ + oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda, \ + TYPE_A* b, std::int64_t ldb, TYPE_B* w, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return hegvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, \ ldb, w, scratchpad, scratchpad_size, dependencies); \ } @@ -1596,27 +1596,27 @@ HEGVD_LAUNCHER_USM(std::complex, double, cusolverDnZhegvd) #undef HEGVD_LAUNCHER_USM template -inline sycl::event hetrd(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T_A *a, std::int64_t lda, T_B *d, - T_B *e, T_A *tau, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d, + T_B* e, T_A* tau, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType_A = typename CudaEquivalentType::Type; using cuDataType_B = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tau_ = reinterpret_cast(tau); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, d_, e_, tau_, scratch_, scratchpad_size, devInfo_); @@ -1628,10 +1628,10 @@ inline sycl::event hetrd(const char *func_name, Func func, sycl::queue &queue, } #define HETRD_LAUNCHER_USM(TYPE_A, TYPE_B, CUSOLVER_ROUTINE) \ - sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A *a, \ - std::int64_t lda, TYPE_B *d, TYPE_B *e, TYPE_A *tau, TYPE_A *scratchpad, \ + sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tau, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return hetrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1641,37 +1641,37 @@ HETRD_LAUNCHER_USM(std::complex, double, cusolverDnZhetrd) #undef HETRD_LAUNCHER_USM -sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "hetrf"); } -sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "hetrf"); } template -inline sycl::event orgbr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - T *a, std::int64_t lda, T *tau, T *scratchpad, + T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1681,10 +1681,10 @@ inline sycl::event orgbr(const char *func_name, Func func, sycl::queue &queue, } #define ORGBR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, \ - std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, TYPE *tau, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1695,22 +1695,22 @@ ORGBR_LAUNCHER_USM(double, cusolverDnDorgbr) #undef ORGBR_LAUNCHER_USM template -inline sycl::event orgqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1720,9 +1720,9 @@ inline sycl::event orgqr(const char *func_name, Func func, sycl::queue &queue, s } #define ORGQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1733,22 +1733,22 @@ ORGQR_LAUNCHER_USM(double, cusolverDnDorgqr) #undef ORGQR_LAUNCHER_USM template -inline sycl::event orgtr(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -1758,9 +1758,9 @@ inline sycl::event orgtr(const char *func_name, Func func, sycl::queue &queue, } #define ORGTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1771,25 +1771,25 @@ ORGTR_LAUNCHER_USM(double, cusolverDnDorgtr) #undef ORGTR_LAUNCHER_USM template -inline sycl::event ormtr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T *a, - std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, @@ -1801,11 +1801,11 @@ inline sycl::event ormtr(const char *func_name, Func func, sycl::queue &queue, } #define ORMTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ormtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, \ tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -1815,38 +1815,38 @@ ORMTR_LAUNCHER_USM(double, cusolverDnDormtr) #undef ORMTR_LAUNCHER_USM -sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "ormrq"); } -sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "ormrq"); } template -inline sycl::event ormqr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, - std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_operation(trans), m, n, k, a_, lda, tau_, c_, ldc, @@ -1857,11 +1857,11 @@ inline sycl::event ormqr(const char *func_name, Func func, sycl::queue &queue, } #define ORMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, \ - TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ormqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -1872,23 +1872,23 @@ ORMQR_LAUNCHER_USM(double, cusolverDnDormqr) #undef ORMQR_LAUNCHER_USM template -inline sycl::event potrf(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, scratch_, scratchpad_size, devInfo_); @@ -1900,9 +1900,9 @@ inline sycl::event potrf(const char *func_name, Func func, sycl::queue &queue, } #define POTRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1915,23 +1915,23 @@ POTRF_LAUNCHER_USM(std::complex, cusolverDnZpotrf) #undef POTRF_LAUNCHER_USM template -inline sycl::event potri(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto scratch_ = reinterpret_cast(scratchpad); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto scratch_ = reinterpret_cast(scratchpad); + auto devInfo_ = reinterpret_cast(devInfo); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, scratch_, scratchpad_size, devInfo_); @@ -1943,9 +1943,9 @@ inline sycl::event potri(const char *func_name, Func func, sycl::queue &queue, } #define POTRI_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potri(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1959,22 +1959,22 @@ POTRI_LAUNCHER_USM(std::complex, cusolverDnZpotri) // cusolverDnXpotrs does not use scratchpad memory template -inline sycl::event potrs(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, T *b, std::int64_t ldb, T *scratchpad, +inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb, nullptr); @@ -1984,10 +1984,10 @@ inline sycl::event potrs(const char *func_name, Func func, sycl::queue &queue, } #define POTRS_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, TYPE *b, std::int64_t ldb, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, TYPE* b, std::int64_t ldb, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2000,24 +2000,24 @@ POTRS_LAUNCHER_USM(std::complex, cusolverDnZpotrs) #undef POTRS_LAUNCHER_USM template -inline sycl::event syevd(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T *a, - std::int64_t lda, T *w, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* w, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto w_ = reinterpret_cast(w); - auto scratch_ = reinterpret_cast(scratchpad); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + auto scratch_ = reinterpret_cast(scratchpad); + auto devInfo_ = reinterpret_cast(devInfo); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -2030,10 +2030,10 @@ inline sycl::event syevd(const char *func_name, Func func, sycl::queue &queue, } #define SYEVD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, \ - std::int64_t n, TYPE *a, std::int64_t lda, TYPE *w, TYPE *scratchpad, \ + sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, \ + std::int64_t n, TYPE* a, std::int64_t lda, TYPE* w, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return syevd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2044,26 +2044,26 @@ SYEVD_LAUNCHER_USM(double, cusolverDnDsyevd) #undef SYEVD_LAUNCHER_USM template -inline sycl::event sygvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T *a, - std::int64_t lda, T *b, std::int64_t ldb, T *w, T *scratchpad, +inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* w, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cusolver_itype(itype), get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, a_, @@ -2076,10 +2076,10 @@ inline sycl::event sygvd(const char *func_name, Func func, sycl::queue &queue, s } #define SYGVD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, \ - oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, std::int64_t lda, TYPE *b, \ - std::int64_t ldb, TYPE *w, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, \ + oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \ + std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return sygvd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, \ ldb, w, scratchpad, scratchpad_size, dependencies); \ } @@ -2090,26 +2090,26 @@ SYGVD_LAUNCHER_USM(double, cusolverDnDsygvd) #undef SYGVD_LAUNCHER_USM template -inline sycl::event sytrd(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *d, T *e, - T *tau, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d, T* e, + T* tau, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tau_ = reinterpret_cast(tau); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, d_, e_, tau_, scratch_, scratchpad_size, devInfo_); @@ -2121,10 +2121,10 @@ inline sycl::event sytrd(const char *func_name, Func func, sycl::queue &queue, } #define SYTRD_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *d, TYPE *e, TYPE *tau, TYPE *scratchpad, \ + sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* d, TYPE* e, TYPE* tau, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return sytrd(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2135,31 +2135,31 @@ SYTRD_LAUNCHER_USM(double, cusolverDnDsytrd) #undef SYTRD_LAUNCHER_USM template -inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - std::int64_t *ipiv, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); + int* devInfo = (int*)malloc_device(sizeof(int), queue); // cuSolver legacy api does not accept 64-bit ints. // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = n; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto scratch_ = reinterpret_cast(scratchpad); - auto ipiv_ = reinterpret_cast(ipiv32); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto scratch_ = reinterpret_cast(scratchpad); + auto ipiv_ = reinterpret_cast(ipiv32); + auto devInfo_ = reinterpret_cast(devInfo); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, ipiv_, scratch_, scratchpad_size, devInfo_); @@ -2167,7 +2167,7 @@ inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, }); // Copy from 32-bit USM to 64-bit - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = static_cast(ipiv32[index]); @@ -2184,10 +2184,10 @@ inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, } #define SYTRF_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t *ipiv, TYPE *scratchpad, \ + sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return sytrf(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2199,51 +2199,51 @@ SYTRF_LAUNCHER_USM(std::complex, cusolverDnZsytrf) #undef SYTRF_LAUNCHER_USM -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } template -inline sycl::event ungbr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - T *a, std::int64_t lda, T *tau, T *scratchpad, + T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -2253,10 +2253,10 @@ inline sycl::event ungbr(const char *func_name, Func func, sycl::queue &queue, } #define UNGBR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, \ - std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, TYPE *tau, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungbr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2267,22 +2267,22 @@ UNGBR_LAUNCHER_USM(std::complex, cusolverDnZungbr) #undef UNGBR_LAUNCHER_USM template -inline sycl::event ungqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -2292,9 +2292,9 @@ inline sycl::event ungqr(const char *func_name, Func func, sycl::queue &queue, s } #define UNGQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -2305,22 +2305,22 @@ UNGQR_LAUNCHER_USM(std::complex, cusolverDnZungqr) #undef UNGQR_LAUNCHER_USM template -inline sycl::event ungtr(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, a_, lda, tau_, scratch_, scratchpad_size, nullptr); @@ -2330,9 +2330,9 @@ inline sycl::event ungtr(const char *func_name, Func func, sycl::queue &queue, } #define UNGTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -2342,40 +2342,40 @@ UNGTR_LAUNCHER_USM(std::complex, cusolverDnZungtr) #undef UNGTR_LAUNCHER_USM -sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "unmrq"); } -sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "unmrq"); } template -inline sycl::event unmqr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, - std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_operation(trans), m, n, k, a_, lda, tau_, c_, ldc, @@ -2386,11 +2386,11 @@ inline sycl::event unmqr(const char *func_name, Func func, sycl::queue &queue, } #define UNMQR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, \ - TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return unmqr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -2401,25 +2401,25 @@ UNMQR_LAUNCHER_USM(std::complex, cusolverDnZunmqr) #undef UNMQR_LAUNCHER_USM template -inline sycl::event unmtr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T *a, - std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using cuDataType = typename CudaEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto scratch_ = reinterpret_cast(scratchpad); cusolverStatus_t err; cusolver_native_named_func(func_name, func, err, handle, get_cublas_side_mode(side), get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, @@ -2431,11 +2431,11 @@ inline sycl::event unmtr(const char *func_name, Func func, sycl::queue &queue, } #define UNMTR_LAUNCHER_USM(TYPE, CUSOLVER_ROUTINE) \ - sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return unmtr(#CUSOLVER_ROUTINE, CUSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, \ tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -2448,16 +2448,18 @@ UNMTR_LAUNCHER_USM(std::complex, cusolverDnZunmtr) // SCRATCHPAD APIs template -inline void gebrd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void gebrd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size); + }); + }) + .wait(); } #define GEBRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2478,37 +2480,40 @@ GEBRD_LAUNCHER_SCRATCH(std::complex, cusolverDnZgebrd_bufferSize) #undef GEBRD_LAUNCHER_SCRATCH template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template -inline void geqrf_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void geqrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda, + scratch_size); + }); + }) + .wait(); } #define GEQRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2529,17 +2534,19 @@ GEQRF_LAUNCHER_SCRATCH(std::complex, cusolverDnZgeqrf_bufferSize) #undef GEQRF_LAUNCHER_SCRATCH template -inline void gesvd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void gesvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, - std::int64_t ldu, std::int64_t ldvt, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size); - }); - }).wait(); + std::int64_t ldu, std::int64_t ldvt, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, scratch_size); + }); + }) + .wait(); } #define GESVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2561,16 +2568,19 @@ GESVD_LAUNCHER_SCRATCH(std::complex, cusolverDnZgesvd_bufferSize) #undef GESVD_LAUNCHER_SCRATCH template -inline void getrf_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void getrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, nullptr, lda, + scratch_size); + }); + }) + .wait(); } #define GETRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2621,18 +2631,20 @@ GETRS_LAUNCHER_SCRATCH(std::complex) #undef GETRS_LAUNCHER_SCRATCH template -inline void heevd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void heevd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t lda, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz), - get_cublas_fill_mode(uplo), n, nullptr, lda, nullptr, - scratch_size); - }); - }).wait(); + std::int64_t lda, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz), + get_cublas_fill_mode(uplo), n, nullptr, lda, nullptr, + scratch_size); + }); + }) + .wait(); } #define HEEVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2652,19 +2664,21 @@ HEEVD_LAUNCHER_SCRATCH(std::complex, cusolverDnZheevd_bufferSize) #undef HEEVD_LAUNCHER_SCRATCH template -inline void hegvd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void hegvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype), - get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, nullptr, - lda, nullptr, ldb, nullptr, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype), + get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, + nullptr, lda, nullptr, ldb, nullptr, scratch_size); + }); + }) + .wait(); } #define HEGVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2684,17 +2698,20 @@ HEGVD_LAUNCHER_SCRATCH(std::complex, cusolverDnZhegvd_bufferSize) #undef HEGVD_LAUNCHER_SCRATCH template -inline void hetrd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void hetrd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, nullptr, nullptr, nullptr, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, nullptr, nullptr, nullptr, + scratch_size); + }); + }) + .wait(); } #define HETRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2713,28 +2730,30 @@ HETRD_LAUNCHER_SCRATCH(std::complex, cusolverDnZhetrd_bufferSize) #undef HETRD_LAUNCHER_SCRATCH template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "hetrf_scratchpad_size"); } template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "hetrf_scratchpad_size"); } template -inline void orgbr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void orgbr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, - nullptr, lda, nullptr, scratch_size); - }); - }).wait(); + std::int64_t k, std::int64_t lda, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec), + m, n, k, nullptr, lda, nullptr, scratch_size); + }); + }) + .wait(); } #define ORGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2754,17 +2773,19 @@ ORGBR_LAUNCHER_SCRATCH(double, cusolverDnDorgbr_bufferSize) #undef ORGBR_LAUNCHER_SCRATCH template -inline void orgtr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void orgtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, nullptr, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, nullptr, scratch_size); + }); + }) + .wait(); } #define ORGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2783,17 +2804,19 @@ ORGTR_LAUNCHER_SCRATCH(double, cusolverDnDorgtr_bufferSize) #undef ORGTR_LAUNCHER_SCRATCH template -inline void orgqr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void orgqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda, nullptr, - scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda, + nullptr, scratch_size); + }); + }) + .wait(); } #define ORGQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2812,14 +2835,14 @@ ORGQR_LAUNCHER_SCRATCH(double, cusolverDnDorgqr_bufferSize) #undef ORGQR_LAUNCHER_SCRATCH template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { throw unimplemented("lapack", "ormrq_scratchpad_size"); } template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2827,19 +2850,21 @@ std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side } template -inline void ormqr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ormqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), - get_cublas_operation(trans), m, n, k, nullptr, lda, nullptr, - nullptr, ldc, scratch_size); - }); - }).wait(); + std::int64_t ldc, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), + get_cublas_operation(trans), m, n, k, nullptr, lda, + nullptr, nullptr, ldc, scratch_size); + }); + }) + .wait(); } #define ORMQRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2859,19 +2884,21 @@ ORMQRF_LAUNCHER_SCRATCH(double, cusolverDnDormqr_bufferSize) #undef ORMQRF_LAUNCHER_SCRATCH template -inline void ormtr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ormtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldc, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), - get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, n, - nullptr, lda, nullptr, nullptr, ldc, scratch_size); - }); - }).wait(); + std::int64_t lda, std::int64_t ldc, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), + get_cublas_fill_mode(uplo), get_cublas_operation(trans), + m, n, nullptr, lda, nullptr, nullptr, ldc, scratch_size); + }); + }) + .wait(); } #define ORMTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2892,17 +2919,19 @@ ORMTR_LAUNCHER_SCRATCH(double, cusolverDnDormtr_bufferSize) #undef ORMTR_LAUNCHER_SCRATCH template -inline void potrf_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void potrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, scratch_size); + }); + }) + .wait(); } #define POTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2939,17 +2968,19 @@ POTRS_LAUNCHER_SCRATCH(std::complex) #undef POTRS_LAUNCHER_SCRATCH template -inline void potri_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void potri_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, scratch_size); + }); + }) + .wait(); } #define POTRI_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -2970,16 +3001,19 @@ POTRI_LAUNCHER_SCRATCH(std::complex, cusolverDnZpotri_bufferSize) #undef POTRI_LAUNCHER_SCRATCH template -inline void sytrf_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void sytrf_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, nullptr, lda, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, n, nullptr, lda, + scratch_size); + }); + }) + .wait(); } #define SYTRF_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3000,18 +3034,20 @@ SYTRF_LAUNCHER_SCRATCH(std::complex, cusolverDnZsytrf_bufferSize) #undef SYTRF_LAUNCHER_SCRATCH template -inline void syevd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void syevd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t lda, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz), - get_cublas_fill_mode(uplo), n, nullptr, lda, nullptr, - scratch_size); - }); - }).wait(); + std::int64_t lda, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_job(jobz), + get_cublas_fill_mode(uplo), n, nullptr, lda, nullptr, + scratch_size); + }); + }) + .wait(); } #define SYEVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3031,19 +3067,21 @@ SYEVD_LAUNCHER_SCRATCH(double, cusolverDnDsyevd_bufferSize) #undef SYEVD_LAUNCHER_SCRATCH template -inline void sygvd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void sygvd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype), - get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, nullptr, - lda, nullptr, ldb, nullptr, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cusolver_itype(itype), + get_cusolver_job(jobz), get_cublas_fill_mode(uplo), n, + nullptr, lda, nullptr, ldb, nullptr, scratch_size); + }); + }) + .wait(); } #define SYGVD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3063,17 +3101,20 @@ SYGVD_LAUNCHER_SCRATCH(double, cusolverDnDsygvd_bufferSize) #undef SYGVD_LAUNCHER_SCRATCH template -inline void sytrd_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void sytrd_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, nullptr, nullptr, nullptr, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, nullptr, nullptr, nullptr, + scratch_size); + }); + }) + .wait(); } #define SYTRD_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3092,21 +3133,21 @@ SYTRD_LAUNCHER_SCRATCH(double, cusolverDnDsytrd_bufferSize) #undef SYTRD_LAUNCHER_SCRATCH template <> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { throw unimplemented("lapack", "trtrs_scratchpad_size"); } template <> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { throw unimplemented("lapack", "trtrs_scratchpad_size"); } template <> -std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -3114,7 +3155,7 @@ std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, onea throw unimplemented("lapack", "trtrs_scratchpad_size"); } template <> -std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -3123,17 +3164,19 @@ std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, one } template -inline void ungbr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ungbr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::int64_t lda, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec), m, n, k, - nullptr, lda, nullptr, scratch_size); - }); - }).wait(); + std::int64_t k, std::int64_t lda, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_generate(vec), + m, n, k, nullptr, lda, nullptr, scratch_size); + }); + }) + .wait(); } #define UNGBR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3153,17 +3196,19 @@ UNGBR_LAUNCHER_SCRATCH(std::complex, cusolverDnZungbr_bufferSize) #undef UNGBR_LAUNCHER_SCRATCH template -inline void ungqr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ungqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda, nullptr, - scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, m, n, k, nullptr, lda, + nullptr, scratch_size); + }); + }) + .wait(); } #define UNGQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3182,17 +3227,19 @@ UNGQR_LAUNCHER_SCRATCH(std::complex, cusolverDnZungqr_bufferSize) #undef UNGQR_LAUNCHER_SCRATCH template -inline void ungtr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void ungtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, - int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), n, - nullptr, lda, nullptr, scratch_size); - }); - }).wait(); + int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_fill_mode(uplo), + n, nullptr, lda, nullptr, scratch_size); + }); + }) + .wait(); } #define UNGTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3211,7 +3258,7 @@ UNGTR_LAUNCHER_SCRATCH(std::complex, cusolverDnZungtr_bufferSize) #undef UNGTR_LAUNCHER_SCRATCH template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -3219,7 +3266,7 @@ std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, onea throw unimplemented("lapack", "unmrq_scratchpad_size"); } template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -3228,19 +3275,21 @@ std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, one } template -inline void unmqr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void unmqr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, - std::int64_t ldc, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), - get_cublas_operation(trans), m, n, k, nullptr, lda, nullptr, - nullptr, ldc, scratch_size); - }); - }).wait(); + std::int64_t ldc, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), + get_cublas_operation(trans), m, n, k, nullptr, lda, + nullptr, nullptr, ldc, scratch_size); + }); + }) + .wait(); } #define UNMQR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ @@ -3260,19 +3309,21 @@ UNMQR_LAUNCHER_SCRATCH(std::complex, cusolverDnZunmqr_bufferSize) #undef UNMQR_LAUNCHER_SCRATCH template -inline void unmtr_scratchpad_size(const char *func_name, Func func, sycl::queue &queue, +inline void unmtr_scratchpad_size(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t lda, std::int64_t ldc, int *scratch_size) { - queue.submit([&](sycl::handler &cgh) { - onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler &sc) { - auto handle = sc.get_handle(queue); - cusolverStatus_t err; - CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), - get_cublas_fill_mode(uplo), get_cublas_operation(trans), m, n, - nullptr, lda, nullptr, nullptr, ldc, scratch_size); - }); - }).wait(); + std::int64_t lda, std::int64_t ldc, int* scratch_size) { + queue + .submit([&](sycl::handler& cgh) { + onemkl_cusolver_host_task(cgh, queue, [=](CusolverScopedContextHandler& sc) { + auto handle = sc.get_handle(queue); + cusolverStatus_t err; + CUSOLVER_ERROR_FUNC_T_SYNC(func_name, func, err, handle, get_cublas_side_mode(side), + get_cublas_fill_mode(uplo), get_cublas_operation(trans), + m, n, nullptr, lda, nullptr, nullptr, ldc, scratch_size); + }); + }) + .wait(); } #define UNMTR_LAUNCHER_SCRATCH(TYPE, CUSOLVER_ROUTINE) \ diff --git a/src/lapack/backends/cusolver/cusolver_scope_handle.cpp b/src/lapack/backends/cusolver/cusolver_scope_handle.cpp index f381336d6..edd731978 100644 --- a/src/lapack/backends/cusolver/cusolver_scope_handle.cpp +++ b/src/lapack/backends/cusolver/cusolver_scope_handle.cpp @@ -44,7 +44,7 @@ thread_local cusolver_handle CusolverScopedContextHandler::handle_he #endif CusolverScopedContextHandler::CusolverScopedContextHandler(sycl::queue queue, - sycl::interop_handle &ih) + sycl::interop_handle& ih) : ih(ih), needToRecover_(false) { placedContext_ = new sycl::context(queue.get_context()); @@ -74,8 +74,8 @@ CusolverScopedContextHandler::~CusolverScopedContextHandler() noexcept(false) { delete placedContext_; } -void ContextCallback(void *userData) { - auto *ptr = static_cast *>(userData); +void ContextCallback(void* userData) { + auto* ptr = static_cast*>(userData); if (!ptr) { return; } @@ -93,7 +93,7 @@ void ContextCallback(void *userData) { } } -cusolverDnHandle_t CusolverScopedContextHandler::get_handle(const sycl::queue &queue) { +cusolverDnHandle_t CusolverScopedContextHandler::get_handle(const sycl::queue& queue) { auto cudaDevice = ih.get_native_device(); CUresult cuErr; CUcontext desired; @@ -140,10 +140,10 @@ cusolverDnHandle_t CusolverScopedContextHandler::get_handle(const sycl::queue &q return handle; } -CUstream CusolverScopedContextHandler::get_stream(const sycl::queue &queue) { +CUstream CusolverScopedContextHandler::get_stream(const sycl::queue& queue) { return sycl::get_native(queue); } -sycl::context CusolverScopedContextHandler::get_context(const sycl::queue &queue) { +sycl::context CusolverScopedContextHandler::get_context(const sycl::queue& queue) { return queue.get_context(); } diff --git a/src/lapack/backends/cusolver/cusolver_scope_handle.hpp b/src/lapack/backends/cusolver/cusolver_scope_handle.hpp index 1bd916f71..34026bf78 100644 --- a/src/lapack/backends/cusolver/cusolver_scope_handle.hpp +++ b/src/lapack/backends/cusolver/cusolver_scope_handle.hpp @@ -89,19 +89,19 @@ cuSolver handle to the SYCL context. class CusolverScopedContextHandler { CUcontext original_; - sycl::context *placedContext_; + sycl::context* placedContext_; bool needToRecover_; - sycl::interop_handle &ih; + sycl::interop_handle& ih; #ifdef ONEMKL_PI_INTERFACE_REMOVED static thread_local cusolver_handle handle_helper; #else static thread_local cusolver_handle handle_helper; #endif - CUstream get_stream(const sycl::queue &queue); - sycl::context get_context(const sycl::queue &queue); + CUstream get_stream(const sycl::queue& queue); + sycl::context get_context(const sycl::queue& queue); public: - CusolverScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + CusolverScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); ~CusolverScopedContextHandler() noexcept(false); /** @@ -111,7 +111,7 @@ class CusolverScopedContextHandler { * @param queue sycl queue. * @return cusolverDnHandle_t a handle to construct cusolver routines */ - cusolverDnHandle_t get_handle(const sycl::queue &queue); + cusolverDnHandle_t get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. template @@ -120,7 +120,7 @@ class CusolverScopedContextHandler { return reinterpret_cast(cudaPtr); } - void wait_stream(const sycl::queue &queue) { + void wait_stream(const sycl::queue& queue) { cuStreamSynchronize(get_stream(queue)); } }; diff --git a/src/lapack/backends/cusolver/cusolver_task.hpp b/src/lapack/backends/cusolver/cusolver_task.hpp index 96107f959..99e51d8ac 100644 --- a/src/lapack/backends/cusolver/cusolver_task.hpp +++ b/src/lapack/backends/cusolver/cusolver_task.hpp @@ -49,9 +49,9 @@ namespace lapack { namespace cusolver { template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih) { #else cgh.host_task([f, queue](sycl::interop_handle ih) { #endif @@ -61,7 +61,7 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } template -static inline void onemkl_cusolver_host_task(H &cgh, sycl::queue queue, F f) { +static inline void onemkl_cusolver_host_task(H& cgh, sycl::queue queue, F f) { (void)host_task_internal(cgh, queue, f); } diff --git a/src/lapack/backends/mkl_common/mkl_lapack.cxx b/src/lapack/backends/mkl_common/mkl_lapack.cxx index 8573bffd9..055531f7c 100644 --- a/src/lapack/backends/mkl_common/mkl_lapack.cxx +++ b/src/lapack/backends/mkl_common/mkl_lapack.cxx @@ -17,1861 +17,1861 @@ * SPDX-License-Identifier: Apache-2.0 *******************************************************************************/ -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, sycl::buffer> &taup, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tauq, - sycl::buffer> &taup, sycl::buffer> &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::hetrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::hetrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::hetrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::hetrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::orgbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::orgbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::orgtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::orgtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ormrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ormrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ormqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ormqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, +void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::sytrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, +void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::sytrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ungbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ungbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ungtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ungtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::unmrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::unmrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::unmqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::unmqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tauq, double *taup, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tauq, double* taup, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tauq, float *taup, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tauq, float* taup, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::gebrd(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::gerqf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, - float *s, std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + float* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::gesvd(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::heevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::hegvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::hetrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::hetrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::hetrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::hetrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::ormtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::ormrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::ormrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::ormqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::ormqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::potri(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *w, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *w, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::syevd(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *w, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *w, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::sygvd(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::sytrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::sytrd(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::sytrf(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::trtrs(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungbr(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungqr(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungtr(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::unmrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::unmrq(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::unmqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::unmqr(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return ::oneapi::mkl::lapack::unmtr(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, sycl::buffer &b, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::geqrf_batch(queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrf_batch(queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getri_batch(queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, std::int64_t **ipiv, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, std::int64_t **ipiv, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::getrs_batch(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float **a, std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float** a, std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double **a, std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::orgqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return ::oneapi::mkl::lapack::potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return ::oneapi::mkl::lapack::ungqr_batch(queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } template <> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::gebrd_scratchpad_size(queue, m, n, lda); } template <> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::gebrd_scratchpad_size(queue, m, n, lda); } template <> -std::int64_t gebrd_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gebrd_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::gebrd_scratchpad_size>(queue, m, n, lda); } template <> -std::int64_t gebrd_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gebrd_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::gebrd_scratchpad_size>(queue, m, n, lda); } template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::gerqf_scratchpad_size(queue, m, n, lda); } template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::gerqf_scratchpad_size(queue, m, n, lda); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::gerqf_scratchpad_size>(queue, m, n, lda); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::gerqf_scratchpad_size>(queue, m, n, lda); } template <> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::geqrf_scratchpad_size(queue, m, n, lda); } template <> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::geqrf_scratchpad_size(queue, m, n, lda); } template <> -std::int64_t geqrf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::geqrf_scratchpad_size>(queue, m, n, lda); } template <> -std::int64_t geqrf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::geqrf_scratchpad_size>(queue, m, n, lda); } template <> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { return ::oneapi::mkl::lapack::gesvd_scratchpad_size(queue, jobu, jobvt, m, n, lda, ldu, ldvt); } template <> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { @@ -1879,7 +1879,7 @@ std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobs ldvt); } template <> -std::int64_t gesvd_scratchpad_size>(sycl::queue &queue, +std::int64_t gesvd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, @@ -1888,7 +1888,7 @@ std::int64_t gesvd_scratchpad_size>(sycl::queue &queue, n, lda, ldu, ldvt); } template <> -std::int64_t gesvd_scratchpad_size>(sycl::queue &queue, +std::int64_t gesvd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, @@ -1897,57 +1897,57 @@ std::int64_t gesvd_scratchpad_size>(sycl::queue &queue, n, lda, ldu, ldvt); } template <> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::getrf_scratchpad_size(queue, m, n, lda); } template <> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::getrf_scratchpad_size(queue, m, n, lda); } template <> -std::int64_t getrf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::getrf_scratchpad_size>(queue, m, n, lda); } template <> -std::int64_t getrf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::getrf_scratchpad_size>(queue, m, n, lda); } template <> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::getri_scratchpad_size(queue, n, lda); } template <> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::getri_scratchpad_size(queue, n, lda); } template <> -std::int64_t getri_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::getri_scratchpad_size>(queue, n, lda); } template <> -std::int64_t getri_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::getri_scratchpad_size>(queue, n, lda); } template <> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return ::oneapi::mkl::lapack::getrs_scratchpad_size(queue, trans, n, nrhs, lda, ldb); } template <> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return ::oneapi::mkl::lapack::getrs_scratchpad_size(queue, trans, n, nrhs, lda, ldb); } template <> -std::int64_t getrs_scratchpad_size>(sycl::queue &queue, +std::int64_t getrs_scratchpad_size>(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -1955,7 +1955,7 @@ std::int64_t getrs_scratchpad_size>(sycl::queue &queue, lda, ldb); } template <> -std::int64_t getrs_scratchpad_size>(sycl::queue &queue, +std::int64_t getrs_scratchpad_size>(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -1963,21 +1963,21 @@ std::int64_t getrs_scratchpad_size>(sycl::queue &queue, lda, ldb); } template <> -std::int64_t heevd_scratchpad_size>(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t heevd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::heevd_scratchpad_size>(queue, jobz, uplo, n, lda); } template <> -std::int64_t heevd_scratchpad_size>(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t heevd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::heevd_scratchpad_size>(queue, jobz, uplo, n, lda); } template <> -std::int64_t hegvd_scratchpad_size>(sycl::queue &queue, std::int64_t itype, +std::int64_t hegvd_scratchpad_size>(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -1985,7 +1985,7 @@ std::int64_t hegvd_scratchpad_size>(sycl::queue &queue, std: uplo, n, lda, ldb); } template <> -std::int64_t hegvd_scratchpad_size>(sycl::queue &queue, std::int64_t itype, +std::int64_t hegvd_scratchpad_size>(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -1993,59 +1993,59 @@ std::int64_t hegvd_scratchpad_size>(sycl::queue &queue, std uplo, n, lda, ldb); } template <> -std::int64_t hetrd_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t hetrd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::hetrd_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t hetrd_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t hetrd_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::hetrd_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::hetrf_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::hetrf_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return ::oneapi::mkl::lapack::orgbr_scratchpad_size(queue, vect, m, n, k, lda); } template <> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return ::oneapi::mkl::lapack::orgbr_scratchpad_size(queue, vect, m, n, k, lda); } template <> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::orgtr_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::orgtr_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return ::oneapi::mkl::lapack::orgqr_scratchpad_size(queue, m, n, k, lda); } template <> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return ::oneapi::mkl::lapack::orgqr_scratchpad_size(queue, m, n, k, lda); } template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2053,7 +2053,7 @@ std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side ldc); } template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2061,7 +2061,7 @@ std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side ldc); } template <> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2069,7 +2069,7 @@ std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side ldc); } template <> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2077,7 +2077,7 @@ std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side ldc); } template <> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2085,7 +2085,7 @@ std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side ldc); } template <> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2093,129 +2093,129 @@ std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side ldc); } template <> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::potrf_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::potrf_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t potrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::potrf_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t potrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::potrf_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return ::oneapi::mkl::lapack::potrs_scratchpad_size(queue, uplo, n, nrhs, lda, ldb); } template <> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return ::oneapi::mkl::lapack::potrs_scratchpad_size(queue, uplo, n, nrhs, lda, ldb); } template <> -std::int64_t potrs_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrs_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return ::oneapi::mkl::lapack::potrs_scratchpad_size>(queue, uplo, n, nrhs, lda, ldb); } template <> -std::int64_t potrs_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrs_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return ::oneapi::mkl::lapack::potrs_scratchpad_size>(queue, uplo, n, nrhs, lda, ldb); } template <> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::potri_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::potri_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t potri_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potri_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::potri_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t potri_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potri_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::potri_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::sytrf_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::sytrf_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t sytrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t sytrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::sytrf_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t sytrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t sytrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::sytrf_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::syevd_scratchpad_size(queue, jobz, uplo, n, lda); } template <> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::syevd_scratchpad_size(queue, jobz, uplo, n, lda); } template <> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { return ::oneapi::mkl::lapack::sygvd_scratchpad_size(queue, itype, jobz, uplo, n, lda, ldb); } template <> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { return ::oneapi::mkl::lapack::sygvd_scratchpad_size(queue, itype, jobz, uplo, n, lda, ldb); } template <> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::sytrd_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::sytrd_scratchpad_size(queue, uplo, n, lda); } template <> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2223,7 +2223,7 @@ std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo lda, ldb); } template <> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2231,7 +2231,7 @@ std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo lda, ldb); } template <> -std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -2240,7 +2240,7 @@ std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, onea queue, uplo, trans, diag, n, nrhs, lda, ldb); } template <> -std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -2249,7 +2249,7 @@ std::int64_t trtrs_scratchpad_size>(sycl::queue &queue, one queue, uplo, trans, diag, n, nrhs, lda, ldb); } template <> -std::int64_t ungbr_scratchpad_size>(sycl::queue &queue, +std::int64_t ungbr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { @@ -2257,7 +2257,7 @@ std::int64_t ungbr_scratchpad_size>(sycl::queue &queue, lda); } template <> -std::int64_t ungbr_scratchpad_size>(sycl::queue &queue, +std::int64_t ungbr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { @@ -2265,29 +2265,29 @@ std::int64_t ungbr_scratchpad_size>(sycl::queue &queue, lda); } template <> -std::int64_t ungqr_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t ungqr_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return ::oneapi::mkl::lapack::ungqr_scratchpad_size>(queue, m, n, k, lda); } template <> -std::int64_t ungqr_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t ungqr_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return ::oneapi::mkl::lapack::ungqr_scratchpad_size>(queue, m, n, k, lda); } template <> -std::int64_t ungtr_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t ungtr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::ungtr_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t ungtr_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t ungtr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return ::oneapi::mkl::lapack::ungtr_scratchpad_size>(queue, uplo, n, lda); } template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2296,7 +2296,7 @@ std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, onea n, k, lda, ldc); } template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2305,7 +2305,7 @@ std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, one n, k, lda, ldc); } template <> -std::int64_t unmqr_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmqr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2314,7 +2314,7 @@ std::int64_t unmqr_scratchpad_size>(sycl::queue &queue, onea n, k, lda, ldc); } template <> -std::int64_t unmqr_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmqr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2323,7 +2323,7 @@ std::int64_t unmqr_scratchpad_size>(sycl::queue &queue, one n, k, lda, ldc); } template <> -std::int64_t unmtr_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmtr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, @@ -2332,7 +2332,7 @@ std::int64_t unmtr_scratchpad_size>(sycl::queue &queue, onea trans, m, n, lda, ldc); } template <> -std::int64_t unmtr_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmtr_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, @@ -2341,14 +2341,14 @@ std::int64_t unmtr_scratchpad_size>(sycl::queue &queue, one queue, side, uplo, trans, m, n, lda, ldc); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size(queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2356,7 +2356,7 @@ std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_ stride_ipiv, batch_size); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -2365,7 +2365,7 @@ std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -2374,14 +2374,14 @@ std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queu queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return ::oneapi::mkl::lapack::getri_batch_scratchpad_size(queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2389,7 +2389,7 @@ std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_ stride_ipiv, batch_size); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -2398,7 +2398,7 @@ std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -2407,7 +2407,7 @@ std::int64_t getri_batch_scratchpad_size>(sycl::queue &queu queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, @@ -2416,7 +2416,7 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl: queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, @@ -2426,7 +2426,7 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( @@ -2434,28 +2434,28 @@ std::int64_t getrs_batch_scratchpad_size>( } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size(queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size(queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, @@ -2464,7 +2464,7 @@ std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, @@ -2473,21 +2473,21 @@ std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queu queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size(queue, uplo, n, lda, stride_a, batch_size); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size(queue, uplo, n, lda, stride_a, batch_size); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, @@ -2496,7 +2496,7 @@ std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue queue, uplo, n, lda, stride_a, batch_size); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, @@ -2505,7 +2505,7 @@ std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queu queue, uplo, n, lda, stride_a, batch_size); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { @@ -2513,7 +2513,7 @@ std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl: queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, @@ -2523,20 +2523,20 @@ std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2544,7 +2544,7 @@ std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t stride_tau, batch_size); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2553,241 +2553,241 @@ std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_ } template <> std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size(queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size(queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getrf_batch_scratchpad_size>( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getri_batch_scratchpad_size(queue, n, lda, group_count, group_sizes); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getri_batch_scratchpad_size(queue, n, lda, group_count, group_sizes); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getri_batch_scratchpad_size>( queue, n, lda, group_count, group_sizes); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getri_batch_scratchpad_size>( queue, n, lda, group_count, group_sizes); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size(queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::getrs_batch_scratchpad_size>( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size(queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size(queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::geqrf_batch_scratchpad_size>( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::int64_t *lda, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::orgqr_batch_scratchpad_size(queue, m, n, k, lda, group_count, group_sizes); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::orgqr_batch_scratchpad_size(queue, m, n, k, lda, group_count, group_sizes); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size(queue, uplo, n, lda, group_count, group_sizes); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size(queue, uplo, n, lda, group_count, group_sizes); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, - oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, + oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( queue, uplo, n, lda, group_count, group_sizes); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, - oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, + oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::potrf_batch_scratchpad_size>( queue, uplo, n, lda, group_count, group_sizes); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size(queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::potrs_batch_scratchpad_size>( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t ungqr_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( queue, m, n, k, lda, group_count, group_sizes); } template <> -std::int64_t ungqr_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return ::oneapi::mkl::lapack::ungqr_batch_scratchpad_size>( queue, m, n, k, lda, group_count, group_sizes); } diff --git a/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp b/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp index 1932bb959..d0ba37e7a 100644 --- a/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp +++ b/src/lapack/backends/mkl_common/mkl_lapack_backend.hpp @@ -28,1235 +28,1235 @@ namespace oneapi { namespace mkl { namespace lapack { -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, sycl::buffer> &taup, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tauq, - sycl::buffer> &taup, sycl::buffer> &scratchpad, +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer &u, std::int64_t ldu, sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, +void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &w, sycl::buffer &scratchpad, +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, +void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, +void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size); -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, sycl::buffer &b, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, float *d, float *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *d, double *e, double *tauq, double *taup, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *d, float *e, float *tauq, float *taup, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, double *d, double *e, std::complex *tauq, - std::complex *taup, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t *ipiv, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, float *s, float *u, - std::int64_t ldu, float *vt, std::int64_t ldvt, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, - float *s, std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, std::complex *a, std::int64_t lda, - double *s, std::complex *u, std::int64_t ldu, std::complex *vt, - std::int64_t ldvt, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, float *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, double *w, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, double *a, - std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, float *a, - std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - float *a, std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - double *a, std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *w, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *w, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *w, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *w, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tauq, double* taup, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tauq, float* taup, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + float* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, std::int64_t **ipiv, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, std::int64_t **ipiv, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies = {}); +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float **a, std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double **a, std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, + const std::vector& dependencies = {}); +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float** a, std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies = {}); +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies = {}); + const std::vector& dependencies = {}); +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); template = nullptr> -std::int64_t gebrd_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t geqrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t gesvd_scratchpad_size(sycl::queue &queue, oneapi::mkl::jobsvd jobu, +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); template = nullptr> -std::int64_t getrf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda); +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t getrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t heevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hegvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t hetrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t hetrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t orgtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t orgqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t ormtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t potrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t potrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t potri_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sytrf_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t syevd_scratchpad_size(sycl::queue &queue, oneapi::mkl::job jobz, +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t sygvd_scratchpad_size(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t sytrd_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t trtrs_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); template = nullptr> -std::int64_t ungbr_scratchpad_size(sycl::queue &queue, oneapi::mkl::generate vect, std::int64_t m, +std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungqr_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); template = nullptr> -std::int64_t ungtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); template = nullptr> -std::int64_t unmrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmqr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t unmtr_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); template = nullptr> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); template = nullptr> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes); +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); template = nullptr> -std::int64_t ungqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes); +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); } // namespace lapack } // namespace mkl diff --git a/src/lapack/backends/rocsolver/rocsolver_batch.cpp b/src/lapack/backends/rocsolver/rocsolver_batch.cpp index 4363be306..2965faf51 100644 --- a/src/lapack/backends/rocsolver/rocsolver_batch.cpp +++ b/src/lapack/backends/rocsolver/rocsolver_batch.cpp @@ -31,476 +31,476 @@ namespace rocsolver { // BATCH BUFFER API -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "geqrf_batch"); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &tau, - std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "geqrf_batch"); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "geqrf_batch"); } -void geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "geqrf_batch"); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri_batch"); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri_batch"); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri_batch"); } -void getri_batch(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri_batch"); } -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, sycl::buffer &b, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrs_batch"); } -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrs_batch"); } -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrs_batch"); } -void getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrs_batch"); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrf_batch"); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrf_batch"); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrf_batch"); } -void getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getrf_batch"); } -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "orgqr_batch"); } -void orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "orgqr_batch"); } -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrf_batch"); } -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrf_batch"); } -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrf_batch"); } -void potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrf_batch"); } -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrs_batch"); } -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrs_batch"); } -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrs_batch"); } -void potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "potrs_batch"); } -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ungqr_batch"); } -void ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ungqr_batch"); } // BATCH USM API -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *tau, +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event geqrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "geqrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getrf_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrf_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getri_batch(sycl::queue &queue, std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getri_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, float *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, float *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, float **a, std::int64_t *lda, std::int64_t **ipiv, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, double **a, std::int64_t *lda, std::int64_t **ipiv, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event getrs_batch(sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getrs_batch"); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, std::int64_t stride_a, float *tau, - std::int64_t stride_tau, std::int64_t batch_size, float *scratchpad, +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "orgqr_batch"); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, std::int64_t stride_a, double *tau, - std::int64_t stride_tau, std::int64_t batch_size, double *scratchpad, +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "orgqr_batch"); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - float **a, std::int64_t *lda, float **tau, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float** a, std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "orgqr_batch"); } -sycl::event orgqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - double **a, std::int64_t *lda, double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "orgqr_batch"); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, float *a, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrf_batch"); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, double *a, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrf_batch"); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "potrf_batch"); } -sycl::event potrf_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event potrf_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { throw unimplemented("lapack", "potrf_batch"); } template -inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, T **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event potrf_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, T** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; int64_t batch_size = 0; @@ -509,27 +509,27 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu batch_size += group_sizes[i]; } - int *info = (int *)malloc_device(sizeof(int) * batch_size, queue); - T **a_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); + int* info = (int*)malloc_device(sizeof(int) * batch_size, queue); + T** a_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); auto done_cpy = - queue.submit([&](sycl::handler &h) { h.memcpy(a_dev, a, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(a_dev, a, batch_size * sizeof(T*)); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } cgh.depends_on(done_cpy); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a_dev); - auto *info_ = reinterpret_cast(info); - rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo[i]), - (int)n[i], a_ + offset, (int)lda[i], info_ + offset, - (int)group_sizes[i]); + auto** a_ = reinterpret_cast(a_dev); + auto* info_ = reinterpret_cast(info); + rocsolver_native_named_func(func_name, func, err, handle, + get_rocblas_fill_mode(uplo[i]), (int)n[i], a_ + offset, + (int)lda[i], info_ + offset, (int)group_sizes[i]); offset += group_sizes[i]; } }); @@ -540,9 +540,9 @@ inline sycl::event potrf_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRF_BATCH_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ sycl::event potrf_batch( \ - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, TYPE **a, std::int64_t *lda, \ - std::int64_t group_count, std::int64_t *group_sizes, TYPE *scratchpad, \ - std::int64_t scratchpad_size, const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, TYPE** a, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ return potrf_batch(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, \ group_count, group_sizes, scratchpad, scratchpad_size, dependencies); \ } @@ -554,44 +554,44 @@ POTRF_BATCH_LAUNCHER_USM(std::complex, rocsolver_zpotrf_batched) #undef POTRF_BATCH_LAUNCHER_USM -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t stride_a, - float *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrs_batch"); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, std::int64_t stride_a, - double *b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrs_batch"); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrs_batch"); } -sycl::event potrs_batch(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, +sycl::event potrs_batch(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "potrs_batch"); } template -inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, T **a, - std::int64_t *lda, T **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, T *scratchpad, +inline sycl::event potrs_batch(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, T** a, + std::int64_t* lda, T** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; int64_t batch_size = 0; @@ -605,31 +605,32 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu "rocsolver potrs_batch only supports nrhs = 1"); } - T **a_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); - T **b_dev = (T **)malloc_device(sizeof(T *) * batch_size, queue); + T** a_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); + T** b_dev = (T**)malloc_device(sizeof(T*) * batch_size, queue); auto done_cpy_a = - queue.submit([&](sycl::handler &h) { h.memcpy(a_dev, a, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(a_dev, a, batch_size * sizeof(T*)); }); auto done_cpy_b = - queue.submit([&](sycl::handler &h) { h.memcpy(b_dev, b, batch_size * sizeof(T *)); }); + queue.submit([&](sycl::handler& h) { h.memcpy(b_dev, b, batch_size * sizeof(T*)); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } cgh.depends_on(done_cpy_a); cgh.depends_on(done_cpy_b); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); int64_t offset = 0; rocblas_status err; for (int64_t i = 0; i < group_count; i++) { - auto **a_ = reinterpret_cast(a_dev); - auto **b_ = reinterpret_cast(b_dev); - rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo[i]), - (int)n[i], (int)nrhs[i], a_ + offset, (int)lda[i], - b_ + offset, (int)ldb[i], (int)group_sizes[i]); + auto** a_ = reinterpret_cast(a_dev); + auto** b_ = reinterpret_cast(b_dev); + rocsolver_native_named_func(func_name, func, err, handle, + get_rocblas_fill_mode(uplo[i]), (int)n[i], (int)nrhs[i], + a_ + offset, (int)lda[i], b_ + offset, (int)ldb[i], + (int)group_sizes[i]); offset += group_sizes[i]; } }); @@ -640,10 +641,10 @@ inline sycl::event potrs_batch(const char *func_name, Func func, sycl::queue &qu // Scratchpad memory not needed as parts of buffer a is used as workspace memory #define POTRS_BATCH_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ sycl::event potrs_batch( \ - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, \ - TYPE **a, std::int64_t *lda, TYPE **b, std::int64_t *ldb, std::int64_t group_count, \ - std::int64_t *group_sizes, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, \ + TYPE** a, std::int64_t* lda, TYPE** b, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs_batch(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, \ ldb, group_count, group_sizes, scratchpad, scratchpad_size, \ dependencies); \ @@ -656,52 +657,52 @@ POTRS_BATCH_LAUNCHER_USM(std::complex, rocsolver_zpotrs_batched) #undef POTRS_BATCH_LAUNCHER_USM -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "ungqr_batch"); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::complex *tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "ungqr_batch"); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "ungqr_batch"); } -sycl::event ungqr_batch(sycl::queue &queue, std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::complex **a, std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "ungqr_batch"); } // BATCH SCRATCHPAD API template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -709,7 +710,7 @@ std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -717,20 +718,20 @@ std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queu throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -738,7 +739,7 @@ std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -746,7 +747,7 @@ std::int64_t getri_batch_scratchpad_size>(sycl::queue &queu throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, @@ -754,7 +755,7 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl: throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose trans, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, @@ -763,32 +764,32 @@ std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, @@ -796,7 +797,7 @@ std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, @@ -805,19 +806,19 @@ std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queu } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { throw unimplemented("lapack", "potrf_batch_scratchpad_size"); } template <> -std::int64_t potrf_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { throw unimplemented("lapack", "potrf_batch_scratchpad_size"); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, @@ -825,7 +826,7 @@ std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue throw unimplemented("lapack", "potrf_batch_scratchpad_size"); } template <> -std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, @@ -833,14 +834,14 @@ std::int64_t potrf_batch_scratchpad_size>(sycl::queue &queu throw unimplemented("lapack", "potrf_batch_scratchpad_size"); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("lapack", "potrs_batch_scratchpad_size"); } template <> -std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, @@ -849,25 +850,25 @@ std::int64_t potrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("lapack", "potrs_batch_scratchpad_size"); } template <> std::int64_t potrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { throw unimplemented("lapack", "potrs_batch_scratchpad_size"); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -875,148 +876,148 @@ std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_ } template <> std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); } template <> std::int64_t ungqr_batch_scratchpad_size>( - sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrf_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getri_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getri_batch_scratchpad_size"); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> -std::int64_t getrs_batch_scratchpad_size(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> std::int64_t getrs_batch_scratchpad_size>( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes) { + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { throw unimplemented("lapack", "getrs_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t geqrf_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::int64_t *lda, +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); } template <> -std::int64_t orgqr_batch_scratchpad_size(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); } // rocsolverDnXpotrfBatched does not use scratchpad memory -#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrf_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t * n, std::int64_t * lda, \ - std::int64_t group_count, std::int64_t * group_sizes) { \ - return 0; \ +#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrf_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + return 0; \ } POTRF_GROUP_LAUNCHER_SCRATCH(float) @@ -1027,13 +1028,13 @@ POTRF_GROUP_LAUNCHER_SCRATCH(std::complex) #undef POTRF_GROUP_LAUNCHER_SCRATCH // rocsolverDnXpotrsBatched does not use scratchpad memory -#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ - template <> \ - std::int64_t potrs_batch_scratchpad_size( \ - sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t * n, std::int64_t * nrhs, \ - std::int64_t * lda, std::int64_t * ldb, std::int64_t group_count, \ - std::int64_t * group_sizes) { \ - return 0; \ +#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrs_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::mkl::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \ + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + return 0; \ } POTRS_GROUP_LAUNCHER_SCRATCH(float) @@ -1044,19 +1045,19 @@ POTRS_GROUP_LAUNCHER_SCRATCH(std::complex) #undef POTRS_GROUP_LAUNCHER_SCRATCH template <> -std::int64_t ungqr_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); } template <> -std::int64_t ungqr_batch_scratchpad_size>(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); } diff --git a/src/lapack/backends/rocsolver/rocsolver_handle.hpp b/src/lapack/backends/rocsolver/rocsolver_handle.hpp index c44463ef4..fff7d591c 100644 --- a/src/lapack/backends/rocsolver/rocsolver_handle.hpp +++ b/src/lapack/backends/rocsolver/rocsolver_handle.hpp @@ -30,10 +30,10 @@ namespace rocsolver { template struct rocsolver_handle { - using handle_container_t = std::unordered_map *>; + using handle_container_t = std::unordered_map*>; handle_container_t rocsolver_handle_mapper_{}; ~rocsolver_handle() noexcept(false) { - for (auto &handle_pair : rocsolver_handle_mapper_) { + for (auto& handle_pair : rocsolver_handle_mapper_) { rocblas_status err; if (handle_pair.second != nullptr) { auto handle = handle_pair.second->exchange(nullptr); diff --git a/src/lapack/backends/rocsolver/rocsolver_helper.hpp b/src/lapack/backends/rocsolver/rocsolver_helper.hpp index 34064fb09..694e4e08b 100644 --- a/src/lapack/backends/rocsolver/rocsolver_helper.hpp +++ b/src/lapack/backends/rocsolver/rocsolver_helper.hpp @@ -81,7 +81,7 @@ void overflow_check(Index index, Next... indices) { class rocsolver_error : virtual public std::runtime_error { protected: - inline const char *rocsolver_error_map(rocblas_status error) { + inline const char* rocsolver_error_map(rocblas_status error) { return rocblas_status_to_string(error); } @@ -111,7 +111,7 @@ class rocsolver_error : virtual public std::runtime_error { class hip_error : virtual public std::runtime_error { protected: - inline const char *hip_error_map(hipError_t result) { + inline const char* hip_error_map(hipError_t result) { return hipGetErrorName(result); } int error_number; ///< error number @@ -167,9 +167,8 @@ class hip_error : virtual public std::runtime_error { HIP_ERROR_FUNC(hipStreamSynchronize, hip_err, currentStreamId); template -inline void rocsolver_native_named_func(const char *func_name, Func func, - rocsolver_status err, - rocsolver_handle handle, Types... args){ +inline void rocsolver_native_named_func(const char* func_name, Func func, rocsolver_status err, + rocsolver_handle handle, Types... args) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND ROCSOLVER_ERROR_FUNC_T(func_name, func, err, handle, args...) #else @@ -258,12 +257,12 @@ struct RocmEquivalentType> { /* devinfo */ -inline int get_rocsolver_devinfo(sycl::queue &queue, sycl::buffer &devInfo) { +inline int get_rocsolver_devinfo(sycl::queue& queue, sycl::buffer& devInfo) { sycl::host_accessor dev_info_{ devInfo }; return dev_info_[0]; } -inline int get_rocsolver_devinfo(sycl::queue &queue, const int *devInfo) { +inline int get_rocsolver_devinfo(sycl::queue& queue, const int* devInfo) { int dev_info_; queue.memcpy(&dev_info_, devInfo, sizeof(int)); queue.wait(); @@ -271,8 +270,8 @@ inline int get_rocsolver_devinfo(sycl::queue &queue, const int *devInfo) { } template -inline void lapack_info_check(sycl::queue &queue, DEVINFO_T devinfo, const char *func_name, - const char *cufunc_name) { +inline void lapack_info_check(sycl::queue& queue, DEVINFO_T devinfo, const char* func_name, + const char* cufunc_name) { queue.wait(); const int devinfo_ = get_rocsolver_devinfo(queue, devinfo); if (devinfo_ > 0) diff --git a/src/lapack/backends/rocsolver/rocsolver_lapack.cpp b/src/lapack/backends/rocsolver/rocsolver_lapack.cpp index d3e1b9e26..99a02c22a 100644 --- a/src/lapack/backends/rocsolver/rocsolver_lapack.cpp +++ b/src/lapack/backends/rocsolver/rocsolver_lapack.cpp @@ -32,27 +32,27 @@ namespace rocsolver { // BUFFER APIs template -inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void gebrd(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tauq_acc = tauq.template get_access(cgh); auto taup_acc = taup.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tauq_ = sc.get_mem(tauq_acc); - auto taup_ = sc.get_mem(taup_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tauq_ = sc.get_mem(tauq_acc); + auto taup_ = sc.get_mem(taup_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, d_, e_, tauq_, taup_); @@ -61,10 +61,10 @@ inline void gebrd(const char *func_name, Func func, sycl::queue &queue, std::int } #define GEBRD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, \ - sycl::buffer &tauq, sycl::buffer &taup, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tauq, sycl::buffer& taup, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ gebrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, \ scratchpad, scratchpad_size); \ } @@ -76,41 +76,41 @@ GEBRD_LAUNCHER(std::complex, double, rocsolver_zgebrd) #undef GEBRD_LAUNCHER -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } -void gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "gerqf"); } template -inline void geqrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void geqrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, tau_); }); @@ -118,8 +118,8 @@ inline void geqrf(const char *func_name, Func func, sycl::queue &queue, std::int } #define GEQRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ + void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ geqrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, tau, scratchpad, \ scratchpad_size); \ @@ -133,9 +133,9 @@ GEQRF_LAUNCHER(std::complex, rocsolver_zgeqrf) #undef GEQRF_LAUNCHER template -void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -146,15 +146,15 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); sycl::buffer devInfo{ 1 }; - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, ipiv32_, devInfo_); @@ -162,7 +162,7 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, }); // Copy from 32-bit buffer to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); @@ -174,8 +174,8 @@ void getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, } #define GETRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ getrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, \ scratchpad_size); \ @@ -188,32 +188,32 @@ GETRF_LAUNCHER(std::complex, rocsolver_zgetrf) #undef GETRF_LAUNCHER -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri"); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri"); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri"); } -void getri(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "getri"); } template -inline void getrs(const char *func_name, Func func, sycl::queue &queue, +inline void getrs(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, nrhs, lda, ldb); @@ -224,7 +224,7 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, std::uint64_t ipiv_size = ipiv.size(); sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { @@ -232,15 +232,15 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, }); }); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv_acc = ipiv32.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv_ = sc.get_mem(ipiv_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv_ = sc.get_mem(ipiv_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb); @@ -249,10 +249,10 @@ inline void getrs(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, \ - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ getrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, b, ldb, \ scratchpad, scratchpad_size); \ } @@ -265,30 +265,30 @@ GETRS_LAUNCHER(std::complex, rocsolver_zgetrs) #undef GETRS_LAUNCHER template -inline void gesvd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, +inline void gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, m, lda, ldu, ldvt, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto s_acc = s.template get_access(cgh); auto u_acc = u.template get_access(cgh); auto vt_acc = vt.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto s_ = sc.get_mem(s_acc); - auto u_ = sc.get_mem(u_acc); - auto vt_ = sc.get_mem(vt_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto s_ = sc.get_mem(s_acc); + auto u_ = sc.get_mem(u_acc); + auto vt_ = sc.get_mem(vt_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_jobsvd(jobu), get_rocsolver_jobsvd(jobvt), m, n, a_, lda, s_, u_, ldu, @@ -300,10 +300,10 @@ inline void gesvd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define GESVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, \ - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, \ - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, \ + void gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, \ + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ gesvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, u, ldu, \ vt, ldvt, scratchpad, scratchpad_size); \ @@ -317,25 +317,25 @@ GESVD_LAUNCHER(std::complex, double, rocsolver_zgesvd) #undef GESVD_LAUNCHER template -inline void heevd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +inline void heevd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -346,9 +346,9 @@ inline void heevd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define HEEVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ heevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ scratchpad_size); \ } @@ -359,28 +359,28 @@ HEEVD_LAUNCHER(std::complex, double, rocsolver_zheevd) #undef HEEVD_LAUNCHER template -inline void hegvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, +inline void hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_itype(itype), get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, @@ -391,10 +391,10 @@ inline void hegvd(const char *func_name, Func func, sycl::queue &queue, std::int } #define HEGVD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, \ - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, \ + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ hegvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, \ w, scratchpad, scratchpad_size); \ } @@ -405,24 +405,24 @@ HEGVD_LAUNCHER(std::complex, double, rocsolver_zhegvd) #undef HEGVD_LAUNCHER template -inline void hetrd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void hetrd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, d_, e_, tau_); @@ -431,10 +431,10 @@ inline void hetrd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define HETRD_LAUNCHER(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - void hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, \ - sycl::buffer &e, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, \ + sycl::buffer& e, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ hetrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size); \ } @@ -444,32 +444,32 @@ HETRD_LAUNCHER(std::complex, double, rocsolver_zhetrd) #undef HETRD_LAUNCHER -void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "hetrf"); } -void hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "hetrf"); } template -inline void orgbr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_generate(vec), m, n, k, a_, lda, tau_); @@ -478,9 +478,9 @@ inline void orgbr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORGBR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, \ - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ orgbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -491,18 +491,18 @@ ORGBR_LAUNCHER(double, rocsolver_dorgbr) #undef ORGBR_LAUNCHER template -inline void orgqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void orgqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_); }); @@ -510,9 +510,9 @@ inline void orgqr(const char *func_name, Func func, sycl::queue &queue, std::int } #define ORGQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ orgqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -523,18 +523,18 @@ ORGQR_LAUNCHER(double, rocsolver_dorgqr) #undef ORGQR_LAUNCHER template -inline void orgtr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void orgtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, tau_); @@ -543,8 +543,8 @@ inline void orgtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ + void orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ orgtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ scratchpad_size); \ @@ -556,22 +556,22 @@ ORGTR_LAUNCHER(double, rocsolver_dorgtr) #undef ORGTR_LAUNCHER template -inline void ormtr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::side side, +inline void ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_fill_mode(uplo), get_rocblas_operation(trans), @@ -581,10 +581,10 @@ inline void ormtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORMTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ + void ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ ormtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, \ c, ldc, scratchpad, scratchpad_size); \ @@ -595,35 +595,35 @@ ORMTR_LAUNCHER(double, rocsolver_dormtr) #undef ORMTR_LAUNCHER -void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ormrq"); } -void ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "ormrq"); } template -inline void ormqr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::side side, +inline void ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_operation(trans), m, n, k, a_, lda, tau_, c_, @@ -633,10 +633,10 @@ inline void ormqr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define ORMQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ormqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ } @@ -647,19 +647,19 @@ ORMQR_LAUNCHER(double, rocsolver_dormqr) #undef ORMQR_LAUNCHER template -inline void potrf(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +inline void potrf(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, devInfo_); @@ -669,8 +669,8 @@ inline void potrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define POTRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ potrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size); \ } @@ -683,19 +683,19 @@ POTRF_LAUNCHER(std::complex, rocsolver_zpotrf) #undef POTRF_LAUNCHER template -inline void potri(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +inline void potri(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, devInfo_); @@ -705,8 +705,8 @@ inline void potri(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define POTRI_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ potri(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size); \ } @@ -719,19 +719,19 @@ POTRI_LAUNCHER(std::complex, rocsolver_zpotri) #undef POTRI_LAUNCHER template -inline void potrs(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, +inline void potrs(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb); @@ -740,9 +740,9 @@ inline void potrs(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define POTRS_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ potrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, \ scratchpad, scratchpad_size); \ } @@ -755,23 +755,23 @@ POTRS_LAUNCHER(std::complex, rocsolver_zpotrs) #undef POTRS_LAUNCHER template -inline void syevd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void syevd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -782,9 +782,9 @@ inline void syevd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYEVD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ syevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, scratchpad, \ scratchpad_size); \ } @@ -795,26 +795,26 @@ SYEVD_LAUNCHER(double, rocsolver_dsyevd) #undef SYEVD_LAUNCHER template -inline void sygvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); sycl::buffer devInfo{ 1 }; - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto b_acc = b.template get_access(cgh); auto w_acc = w.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); auto scratch_acc = scratchpad.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto b_ = sc.get_mem(b_acc); - auto w_ = sc.get_mem(w_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); - auto scratch_ = sc.get_mem(scratch_acc); + auto a_ = sc.get_mem(a_acc); + auto b_ = sc.get_mem(b_acc); + auto w_ = sc.get_mem(w_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); + auto scratch_ = sc.get_mem(scratch_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_itype(itype), get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, @@ -825,10 +825,10 @@ inline void sygvd(const char *func_name, Func func, sycl::queue &queue, std::int } #define SYGVD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, \ - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, \ - sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, \ + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ sygvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, \ w, scratchpad, scratchpad_size); \ } @@ -839,23 +839,23 @@ SYGVD_LAUNCHER(double, rocsolver_dsygvd) #undef SYGVD_LAUNCH template -inline void sytrd(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void sytrd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto d_acc = d.template get_access(cgh); auto e_acc = e.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto d_ = sc.get_mem(d_acc); - auto e_ = sc.get_mem(e_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto d_ = sc.get_mem(d_acc); + auto e_ = sc.get_mem(e_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, d_, e_, tau_); @@ -864,9 +864,9 @@ inline void sytrd(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYTRD_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, \ - sycl::buffer &tau, sycl::buffer &scratchpad, \ + void sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ sytrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size); \ @@ -878,9 +878,9 @@ SYTRD_LAUNCHER(double, rocsolver_dsytrd) #undef SYTRD_LAUNCHER template -inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &scratchpad, +inline void sytrf(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); @@ -892,15 +892,15 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: std::uint64_t ipiv_size = n; sycl::buffer ipiv32(sycl::range<1>{ ipiv_size }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto ipiv32_acc = ipiv32.template get_access(cgh); auto devInfo_acc = devInfo.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto ipiv32_ = sc.get_mem(ipiv32_acc); - auto devInfo_ = sc.get_mem(devInfo_acc); + auto a_ = sc.get_mem(a_acc); + auto ipiv32_ = sc.get_mem(ipiv32_acc); + auto devInfo_ = sc.get_mem(devInfo_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, ipiv32_, devInfo_); @@ -908,7 +908,7 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: }); // Copy from 32-bit buffer to 64-bit - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); auto ipiv32_acc = ipiv32.template get_access(cgh); auto ipiv_acc = ipiv.template get_access(cgh); @@ -920,8 +920,8 @@ inline void sytrf(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define SYTRF_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, \ + void sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ sytrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad, \ scratchpad_size); \ @@ -934,47 +934,47 @@ SYTRF_LAUNCHER(std::complex, rocsolver_zsytrf) #undef SYTRF_LAUNCHER -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } -void trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +void trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "trtrs"); } template -inline void ungbr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +inline void ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_generate(vec), m, n, k, a_, lda, tau_); @@ -983,9 +983,9 @@ inline void ungbr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNGBR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, \ - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ungbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -996,18 +996,18 @@ UNGBR_LAUNCHER(std::complex, rocsolver_zungbr) #undef UNGBR_LAUNCHER template -inline void ungqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void ungqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_); }); @@ -1015,9 +1015,9 @@ inline void ungqr(const char *func_name, Func func, sycl::queue &queue, std::int } #define UNGQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ ungqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, \ scratchpad_size); \ } @@ -1028,18 +1028,18 @@ UNGQR_LAUNCHER(std::complex, rocsolver_zungqr) #undef UNGQR_LAUNCHER template -inline void ungtr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +inline void ungtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, tau_); @@ -1048,8 +1048,8 @@ inline void ungtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNGTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, \ + void ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ ungtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, \ scratchpad_size); \ @@ -1060,37 +1060,37 @@ UNGTR_LAUNCHER(std::complex, rocsolver_zungtr) #undef UNGTR_LAUNCHER -void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "unmrq"); } -void unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, +void unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { throw unimplemented("lapack", "unmrq"); } template -inline void unmqr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::side side, +inline void unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_operation(trans), m, n, k, a_, lda, tau_, c_, @@ -1100,10 +1100,10 @@ inline void unmqr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNMQR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, \ - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, std::int64_t ldc, \ - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { \ + void unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ unmqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, \ ldc, scratchpad, scratchpad_size); \ } @@ -1114,22 +1114,22 @@ UNMQR_LAUNCHER(std::complex, rocsolver_zunmqr) #undef UNMQR_LAUNCHER template -inline void unmtr(const char *func_name, Func func, sycl::queue &queue, oneapi::mkl::side side, +inline void unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - queue.submit([&](sycl::handler &cgh) { + queue.submit([&](sycl::handler& cgh) { auto a_acc = a.template get_access(cgh); auto tau_acc = tau.template get_access(cgh); auto c_acc = c.template get_access(cgh); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = sc.get_mem(a_acc); - auto tau_ = sc.get_mem(tau_acc); - auto c_ = sc.get_mem(c_acc); + auto a_ = sc.get_mem(a_acc); + auto tau_ = sc.get_mem(tau_acc); + auto c_ = sc.get_mem(c_acc); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_fill_mode(uplo), get_rocblas_operation(trans), @@ -1139,10 +1139,10 @@ inline void unmtr(const char *func_name, Func func, sycl::queue &queue, oneapi:: } #define UNMTR_LAUNCHER(TYPE, ROCSOLVER_ROUTINE) \ - void unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ + void unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, \ - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, \ - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ std::int64_t scratchpad_size) { \ unmtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, \ c, ldc, scratchpad, scratchpad_size); \ @@ -1156,26 +1156,26 @@ UNMTR_LAUNCHER(std::complex, rocsolver_zunmtr) // USM APIs template -inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T_A *a, std::int64_t lda, T_B *d, T_B *e, T_A *tauq, - T_A *taup, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event gebrd(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T_A* a, std::int64_t lda, T_B* d, T_B* e, T_A* tauq, + T_A* taup, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tauq_ = reinterpret_cast(tauq); - auto taup_ = reinterpret_cast(taup); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tauq_ = reinterpret_cast(tauq); + auto taup_ = reinterpret_cast(taup); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, d_, e_, tauq_, taup_); @@ -1185,10 +1185,10 @@ inline sycl::event gebrd(const char *func_name, Func func, sycl::queue &queue, s } #define GEBRD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event gebrd(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE_A *a, \ - std::int64_t lda, TYPE_B *d, TYPE_B *e, TYPE_A *tauq, TYPE_A *taup, \ - TYPE_A *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tauq, TYPE_A* taup, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return gebrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1200,43 +1200,43 @@ GEBRD_LAUNCHER_USM(std::complex, double, rocsolver_zgebrd) #undef GEBRD_LAUNCHER_USM -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } -sycl::event gerqf(sycl::queue &queue, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "gerqf"); } template -inline sycl::event geqrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, T *tau, T *scratchpad, +inline sycl::event geqrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, tau_); }); @@ -1245,9 +1245,9 @@ inline sycl::event geqrf(const char *func_name, Func func, sycl::queue &queue, s } #define GEQRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event geqrf(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return geqrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, tau, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1260,10 +1260,10 @@ GEQRF_LAUNCHER_USM(std::complex, rocsolver_zgeqrf) #undef GEQRF_LAUNCHER_USM template -inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, T *a, std::int64_t lda, std::int64_t *ipiv, T *scratchpad, +inline sycl::event getrf(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, scratchpad_size); @@ -1271,19 +1271,19 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = std::min(n, m); - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); - auto ipiv_ = reinterpret_cast(ipiv32); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); + auto ipiv_ = reinterpret_cast(ipiv32); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, a_, lda, ipiv_, devInfo_); @@ -1291,7 +1291,7 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s }); // Copy from 32-bit USM to 64-bit - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = static_cast(ipiv32[index]); @@ -1305,10 +1305,10 @@ inline sycl::event getrf(const char *func_name, Func func, sycl::queue &queue, s } #define GETRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event getrf(sycl::queue &queue, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t *ipiv, TYPE *scratchpad, \ + sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return getrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1320,33 +1320,33 @@ GETRF_LAUNCHER_USM(std::complex, rocsolver_zgetrf) #undef GETRF_LAUNCHER_USM -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri"); } -sycl::event getri(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri"); } -sycl::event getri(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "getri"); } -sycl::event getri(sycl::queue &queue, std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "getri"); } template -inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, std::int64_t *ipiv, T *b, std::int64_t ldb, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event getrs(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, std::int64_t* ipiv, T* b, std::int64_t ldb, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); @@ -1354,25 +1354,25 @@ inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, // To get around the limitation. // Create new buffer and convert 64-bit values. std::uint64_t ipiv_size = n; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv32[index] = static_cast(ipiv[index]); }); }); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } cgh.depends_on(done_casting); - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto ipiv_ = reinterpret_cast(ipiv32); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv32); + auto b_ = reinterpret_cast(b); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb); @@ -1387,10 +1387,10 @@ inline sycl::event getrs(const char *func_name, Func func, sycl::queue &queue, } #define GETRS_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event getrs(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, std::int64_t *ipiv, TYPE *b, \ - std::int64_t ldb, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event getrs(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t* ipiv, TYPE* b, \ + std::int64_t ldb, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return getrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, \ b, ldb, scratchpad, scratchpad_size, dependencies); \ } @@ -1403,28 +1403,28 @@ GETRS_LAUNCHER_USM(std::complex, rocsolver_zgetrs) #undef GETRS_LAUNCHER_USM template -inline sycl::event gesvd(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event gesvd(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, - std::int64_t n, T_A *a, std::int64_t lda, T_B *s, T_A *u, std::int64_t ldu, - T_A *vt, std::int64_t ldvt, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, T_A* a, std::int64_t lda, T_B* s, T_A* u, std::int64_t ldu, + T_A* vt, std::int64_t ldvt, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldu, ldvt, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto s_ = reinterpret_cast(s); - auto u_ = reinterpret_cast(u); - auto vt_ = reinterpret_cast(vt); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto s_ = reinterpret_cast(s); + auto u_ = reinterpret_cast(u); + auto vt_ = reinterpret_cast(vt); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_jobsvd(jobu), get_rocsolver_jobsvd(jobvt), m, n, a_, lda, s_, u_, ldu, @@ -1438,11 +1438,11 @@ inline sycl::event gesvd(const char *func_name, Func func, sycl::queue &queue, } #define GESVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event gesvd(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, \ - std::int64_t m, std::int64_t n, TYPE_A *a, std::int64_t lda, TYPE_B *s, \ - TYPE_A *u, std::int64_t ldu, TYPE_A *vt, std::int64_t ldvt, \ - TYPE_A *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event gesvd(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* s, \ + TYPE_A* u, std::int64_t ldu, TYPE_A* vt, std::int64_t ldvt, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return gesvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, \ u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); \ } @@ -1455,25 +1455,25 @@ GESVD_LAUNCHER_USM(std::complex, double, rocsolver_zgesvd) #undef GESVD_LAUNCHER_USM template -inline sycl::event heevd(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A *&a, - std::int64_t lda, T_B *&w, T_A *&scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event heevd(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -1486,10 +1486,10 @@ inline sycl::event heevd(const char *func_name, Func func, sycl::queue &queue, } #define HEEVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event heevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, \ - std::int64_t n, TYPE_A *a, std::int64_t lda, TYPE_B *w, TYPE_A *scratchpad, \ + sycl::event heevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, \ + std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* w, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return heevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1500,27 +1500,27 @@ HEEVD_LAUNCHER_USM(std::complex, double, rocsolver_zheevd) #undef HEEVD_LAUNCHER_USM template -inline sycl::event hegvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A *&a, - std::int64_t lda, T_A *&b, std::int64_t ldb, T_B *&w, T_A *&scratchpad, +inline sycl::event hegvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_A*& b, std::int64_t ldb, T_B*& w, T_A*& scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_itype(itype), get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, @@ -1533,11 +1533,11 @@ inline sycl::event hegvd(const char *func_name, Func func, sycl::queue &queue, s } #define HEGVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event hegvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, \ - oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A *a, std::int64_t lda, \ - TYPE_A *b, std::int64_t ldb, TYPE_B *w, TYPE_A *scratchpad, \ + sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, \ + oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda, \ + TYPE_A* b, std::int64_t ldb, TYPE_B* w, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return hegvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, \ b, ldb, w, scratchpad, scratchpad_size, dependencies); \ } @@ -1548,24 +1548,24 @@ HEGVD_LAUNCHER_USM(std::complex, double, rocsolver_zhegvd) #undef HEGVD_LAUNCHER_USM template -inline sycl::event hetrd(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T_A *a, std::int64_t lda, T_B *d, - T_B *e, T_A *tau, T_A *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event hetrd(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d, + T_B* e, T_A* tau, T_A* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType_A = typename RocmEquivalentType::Type; using rocmDataType_B = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, d_, e_, tau_); @@ -1575,10 +1575,10 @@ inline sycl::event hetrd(const char *func_name, Func func, sycl::queue &queue, } #define HETRD_LAUNCHER_USM(TYPE_A, TYPE_B, ROCSOLVER_ROUTINE) \ - sycl::event hetrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A *a, \ - std::int64_t lda, TYPE_B *d, TYPE_B *e, TYPE_A *tau, TYPE_A *scratchpad, \ + sycl::event hetrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tau, TYPE_A* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return hetrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1588,36 +1588,36 @@ HETRD_LAUNCHER_USM(std::complex, double, rocsolver_zhetrd) #undef HETRD_LAUNCHER_USM -sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "hetrf"); } -sycl::event hetrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "hetrf"); } template -inline sycl::event orgbr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event orgbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - T *a, std::int64_t lda, T *tau, T *scratchpad, + T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_generate(vec), m, n, k, a_, lda, tau_); @@ -1627,10 +1627,10 @@ inline sycl::event orgbr(const char *func_name, Func func, sycl::queue &queue, } #define ORGBR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event orgbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, \ - std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, TYPE *tau, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1641,21 +1641,21 @@ ORGBR_LAUNCHER_USM(double, rocsolver_dorgbr) #undef ORGBR_LAUNCHER_USM template -inline sycl::event orgqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_); }); @@ -1664,9 +1664,9 @@ inline sycl::event orgqr(const char *func_name, Func func, sycl::queue &queue, s } #define ORGQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event orgqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1677,21 +1677,21 @@ ORGQR_LAUNCHER_USM(double, rocsolver_dorgqr) #undef ORGQR_LAUNCHER_USM template -inline sycl::event orgtr(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event orgtr(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, tau_); @@ -1701,9 +1701,9 @@ inline sycl::event orgtr(const char *func_name, Func func, sycl::queue &queue, } #define ORGTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event orgtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event orgtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return orgtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1714,24 +1714,24 @@ ORGTR_LAUNCHER_USM(double, rocsolver_dorgtr) #undef ORGTR_LAUNCHER_USM template -inline sycl::event ormtr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ormtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T *a, - std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_fill_mode(uplo), get_rocblas_operation(trans), @@ -1742,11 +1742,11 @@ inline sycl::event ormtr(const char *func_name, Func func, sycl::queue &queue, } #define ORMTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ormtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event ormtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ormtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, \ lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -1756,37 +1756,37 @@ ORMTR_LAUNCHER_USM(double, rocsolver_dormtr) #undef ORMTR_LAUNCHER_USM -sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "ormrq"); } -sycl::event ormrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ormrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "ormrq"); } template -inline sycl::event ormqr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ormqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, - std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_operation(trans), m, n, k, a_, lda, tau_, c_, @@ -1797,11 +1797,11 @@ inline sycl::event ormqr(const char *func_name, Func func, sycl::queue &queue, } #define ORMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ormqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, \ - TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event ormqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return ormqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -1812,22 +1812,22 @@ ORMQR_LAUNCHER_USM(double, rocsolver_dormqr) #undef ORMQR_LAUNCHER_USM template -inline sycl::event potrf(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event potrf(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto devInfo_ = reinterpret_cast(devInfo); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, devInfo_); @@ -1839,9 +1839,9 @@ inline sycl::event potrf(const char *func_name, Func func, sycl::queue &queue, } #define POTRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event potrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1854,23 +1854,23 @@ POTRF_LAUNCHER_USM(std::complex, rocsolver_zpotrf) #undef POTRF_LAUNCHER_USM template -inline sycl::event potri(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event potri(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto scratch_ = reinterpret_cast(scratchpad); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto scratch_ = reinterpret_cast(scratchpad); + auto devInfo_ = reinterpret_cast(devInfo); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, devInfo_); @@ -1882,9 +1882,9 @@ inline sycl::event potri(const char *func_name, Func func, sycl::queue &queue, } #define POTRI_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event potri(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potri(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potri(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, scratchpad, \ scratchpad_size, dependencies); \ } @@ -1897,22 +1897,22 @@ POTRI_LAUNCHER_USM(std::complex, rocsolver_zpotri) #undef POTRI_LAUNCHER_USM template -inline sycl::event potrs(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T *a, - std::int64_t lda, T *b, std::int64_t ldb, T *scratchpad, +inline sycl::event potrs(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, nrhs, lda, ldb, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb); @@ -1922,10 +1922,10 @@ inline sycl::event potrs(const char *func_name, Func func, sycl::queue &queue, } #define POTRS_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event potrs(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, \ - std::int64_t nrhs, TYPE *a, std::int64_t lda, TYPE *b, std::int64_t ldb, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event potrs(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, TYPE* b, std::int64_t ldb, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return potrs(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1938,24 +1938,24 @@ POTRS_LAUNCHER_USM(std::complex, rocsolver_zpotrs) #undef POTRS_LAUNCHER_USM template -inline sycl::event syevd(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T *a, - std::int64_t lda, T *w, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event syevd(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* w, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto w_ = reinterpret_cast(w); - auto scratch_ = reinterpret_cast(scratchpad); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + auto scratch_ = reinterpret_cast(scratchpad); + auto devInfo_ = reinterpret_cast(devInfo); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, lda, w_, scratch_, @@ -1968,10 +1968,10 @@ inline sycl::event syevd(const char *func_name, Func func, sycl::queue &queue, } #define SYEVD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event syevd(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, \ - std::int64_t n, TYPE *a, std::int64_t lda, TYPE *w, TYPE *scratchpad, \ + sycl::event syevd(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, \ + std::int64_t n, TYPE* a, std::int64_t lda, TYPE* w, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return syevd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, jobz, uplo, n, a, lda, w, \ scratchpad, scratchpad_size, dependencies); \ } @@ -1982,26 +1982,26 @@ SYEVD_LAUNCHER_USM(double, rocsolver_dsyevd) #undef SYEVD_LAUNCHER_USM template -inline sycl::event sygvd(const char *func_name, Func func, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T *a, - std::int64_t lda, T *b, std::int64_t ldb, T *w, T *scratchpad, +inline sycl::event sygvd(const char* func_name, Func func, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* w, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, ldb, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); - auto done = queue.submit([&](sycl::handler &cgh) { + int* devInfo = (int*)malloc_device(sizeof(int), queue); + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto b_ = reinterpret_cast(b); - auto w_ = reinterpret_cast(w); - auto devInfo_ = reinterpret_cast(devInfo); - auto scratch_ = reinterpret_cast(scratchpad); + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + auto devInfo_ = reinterpret_cast(devInfo); + auto scratch_ = reinterpret_cast(scratchpad); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocsolver_itype(itype), get_rocsolver_job(jobz), get_rocblas_fill_mode(uplo), n, a_, @@ -2014,10 +2014,10 @@ inline sycl::event sygvd(const char *func_name, Func func, sycl::queue &queue, s } #define SYGVD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event sygvd(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, \ - oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, std::int64_t lda, TYPE *b, \ - std::int64_t ldb, TYPE *w, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, \ + oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \ + std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return sygvd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, itype, jobz, uplo, n, a, lda, \ b, ldb, w, scratchpad, scratchpad_size, dependencies); \ } @@ -2028,23 +2028,23 @@ SYGVD_LAUNCHER_USM(double, rocsolver_dsygvd) #undef SYGVD_LAUNCHER_USM template -inline sycl::event sytrd(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *d, T *e, - T *tau, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event sytrd(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d, T* e, + T* tau, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto d_ = reinterpret_cast(d); - auto e_ = reinterpret_cast(e); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, d_, e_, tau_); @@ -2054,10 +2054,10 @@ inline sycl::event sytrd(const char *func_name, Func func, sycl::queue &queue, } #define SYTRD_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event sytrd(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *d, TYPE *e, TYPE *tau, TYPE *scratchpad, \ + sycl::event sytrd(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* d, TYPE* e, TYPE* tau, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return sytrd(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, d, e, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2068,30 +2068,30 @@ SYTRD_LAUNCHER_USM(double, rocsolver_dsytrd) #undef SYTRD_LAUNCHER_USM template -inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, - std::int64_t *ipiv, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event sytrf(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + std::int64_t* ipiv, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - int *devInfo = (int *)malloc_device(sizeof(int), queue); + int* devInfo = (int*)malloc_device(sizeof(int), queue); // rocsolver legacy api does not accept 64-bit ints. // To get around the limitation. // Allocate memory with 32-bit ints then copy over results std::uint64_t ipiv_size = n; - int *ipiv32 = (int *)malloc_device(sizeof(int) * ipiv_size, queue); + int* ipiv32 = (int*)malloc_device(sizeof(int) * ipiv_size, queue); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto ipiv_ = reinterpret_cast(ipiv32); - auto devInfo_ = reinterpret_cast(devInfo); + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv32); + auto devInfo_ = reinterpret_cast(devInfo); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, ipiv_, devInfo_); @@ -2099,7 +2099,7 @@ inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, }); // Copy from 32-bit USM to 64-bit - auto done_casting = queue.submit([&](sycl::handler &cgh) { + auto done_casting = queue.submit([&](sycl::handler& cgh) { cgh.depends_on(done); cgh.parallel_for(sycl::range<1>{ ipiv_size }, [=](sycl::id<1> index) { ipiv[index] = static_cast(ipiv32[index]); @@ -2113,10 +2113,10 @@ inline sycl::event sytrf(const char *func_name, Func func, sycl::queue &queue, } #define SYTRF_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event sytrf(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, std::int64_t *ipiv, TYPE *scratchpad, \ + sycl::event sytrf(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return sytrf(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, ipiv, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2128,50 +2128,50 @@ SYTRF_LAUNCHER_USM(std::complex, rocsolver_zsytrf) #undef SYTRF_LAUNCHER_USM -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } -sycl::event trtrs(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, +sycl::event trtrs(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "trtrs"); } template -inline sycl::event ungbr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event ungbr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, - T *a, std::int64_t lda, T *tau, T *scratchpad, + T* a, std::int64_t lda, T* tau, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_generate(vec), m, n, k, a_, lda, tau_); @@ -2181,10 +2181,10 @@ inline sycl::event ungbr(const char *func_name, Func func, sycl::queue &queue, } #define UNGBR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ungbr(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, \ - std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, TYPE *tau, \ - TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungbr(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungbr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, vec, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2195,21 +2195,21 @@ UNGBR_LAUNCHER_USM(std::complex, rocsolver_zungbr) #undef UNGBR_LAUNCHER_USM template -inline sycl::event ungqr(const char *func_name, Func func, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungqr(const char* func_name, Func func, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, k, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, m, n, k, a_, lda, tau_); }); @@ -2218,9 +2218,9 @@ inline sycl::event ungqr(const char *func_name, Func func, sycl::queue &queue, s } #define UNGQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ungqr(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, m, n, k, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2231,21 +2231,21 @@ UNGQR_LAUNCHER_USM(std::complex, rocsolver_zungqr) #undef UNGQR_LAUNCHER_USM template -inline sycl::event ungtr(const char *func_name, Func func, sycl::queue &queue, - oneapi::mkl::uplo uplo, std::int64_t n, T *a, std::int64_t lda, T *tau, - T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +inline sycl::event ungtr(const char* func_name, Func func, sycl::queue& queue, + oneapi::mkl::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_fill_mode(uplo), n, a_, lda, tau_); @@ -2255,9 +2255,9 @@ inline sycl::event ungtr(const char *func_name, Func func, sycl::queue &queue, } #define UNGTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event ungtr(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *scratchpad, std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + sycl::event ungtr(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ return ungtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, uplo, n, a, lda, tau, \ scratchpad, scratchpad_size, dependencies); \ } @@ -2267,39 +2267,39 @@ UNGTR_LAUNCHER_USM(std::complex, rocsolver_zungtr) #undef UNGTR_LAUNCHER_USM -sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "unmrq"); } -sycl::event unmrq(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event unmrq(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { throw unimplemented("lapack", "unmrq"); } template -inline sycl::event unmqr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event unmqr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::int64_t k, T *a, std::int64_t lda, T *tau, T *c, - std::int64_t ldc, T *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(n, lda, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_operation(trans), m, n, k, a_, lda, tau_, c_, @@ -2310,11 +2310,11 @@ inline sycl::event unmqr(const char *func_name, Func func, sycl::queue &queue, } #define UNMQR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event unmqr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ - std::int64_t m, std::int64_t n, std::int64_t k, TYPE *a, std::int64_t lda, \ - TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event unmqr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return unmqr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, trans, m, n, k, a, lda, \ tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -2325,24 +2325,24 @@ UNMQR_LAUNCHER_USM(std::complex, rocsolver_zunmqr) #undef UNMQR_LAUNCHER_USM template -inline sycl::event unmtr(const char *func_name, Func func, sycl::queue &queue, +inline sycl::event unmtr(const char* func_name, Func func, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T *a, - std::int64_t lda, T *tau, T *c, std::int64_t ldc, T *scratchpad, + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { using rocmDataType = typename RocmEquivalentType::Type; overflow_check(m, n, lda, ldc, scratchpad_size); - auto done = queue.submit([&](sycl::handler &cgh) { + auto done = queue.submit([&](sycl::handler& cgh) { int64_t num_events = dependencies.size(); for (int64_t i = 0; i < num_events; i++) { cgh.depends_on(dependencies[i]); } - onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler &sc) { + onemkl_rocsolver_host_task(cgh, queue, [=](RocsolverScopedContextHandler& sc) { auto handle = sc.get_handle(queue); - auto a_ = reinterpret_cast(a); - auto tau_ = reinterpret_cast(tau); - auto c_ = reinterpret_cast(c); + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); rocblas_status err; rocsolver_native_named_func(func_name, func, err, handle, get_rocblas_side_mode(side), get_rocblas_fill_mode(uplo), get_rocblas_operation(trans), @@ -2353,11 +2353,11 @@ inline sycl::event unmtr(const char *func_name, Func func, sycl::queue &queue, } #define UNMTR_LAUNCHER_USM(TYPE, ROCSOLVER_ROUTINE) \ - sycl::event unmtr(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ - oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE *a, \ - std::int64_t lda, TYPE *tau, TYPE *c, std::int64_t ldc, TYPE *scratchpad, \ + sycl::event unmtr(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, \ + oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ std::int64_t scratchpad_size, \ - const std::vector &dependencies) { \ + const std::vector& dependencies) { \ return unmtr(#ROCSOLVER_ROUTINE, ROCSOLVER_ROUTINE, queue, side, uplo, trans, m, n, a, \ lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); \ } @@ -2384,22 +2384,22 @@ GEBRD_LAUNCHER_SCRATCH(std::complex) #undef GEBRD_LAUNCHER_SCRATCH template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size(sycl::queue &queue, std::int64_t m, std::int64_t n, +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } template <> -std::int64_t gerqf_scratchpad_size>(sycl::queue &queue, std::int64_t m, +std::int64_t gerqf_scratchpad_size>(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "gerqf_scratchpad_size"); } @@ -2448,20 +2448,20 @@ GETRF_LAUNCHER_SCRATCH(std::complex) #undef GETRF_LAUNCHER_SCRATCH template <> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "getri_scratchpad_size"); } template <> -std::int64_t getri_scratchpad_size(sycl::queue &queue, std::int64_t n, std::int64_t lda) { +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "getri_scratchpad_size"); } template <> -std::int64_t getri_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "getri_scratchpad_size"); } template <> -std::int64_t getri_scratchpad_size>(sycl::queue &queue, std::int64_t n, +std::int64_t getri_scratchpad_size>(sycl::queue& queue, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "getri_scratchpad_size"); } @@ -2520,12 +2520,12 @@ HETRD_LAUNCHER_SCRATCH(std::complex) #undef HETRD_LAUNCHER_SCRATCH template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "hetrf_scratchpad_size"); } template <> -std::int64_t hetrf_scratchpad_size>(sycl::queue &queue, oneapi::mkl::uplo uplo, +std::int64_t hetrf_scratchpad_size>(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { throw unimplemented("lapack", "hetrf_scratchpad_size"); } @@ -2568,14 +2568,14 @@ ORGQR_LAUNCHER_SCRATCH(double) #undef ORGQR_LAUNCHER_SCRATCH template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { throw unimplemented("lapack", "ormrq_scratchpad_size"); } template <> -std::int64_t ormrq_scratchpad_size(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2758,7 +2758,7 @@ UNGTR_LAUNCHER_SCRATCH(std::complex) #undef UNGTR_LAUNCHER_SCRATCH template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2766,7 +2766,7 @@ std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, onea throw unimplemented("lapack", "unmrq_scratchpad_size"); } template <> -std::int64_t unmrq_scratchpad_size>(sycl::queue &queue, oneapi::mkl::side side, +std::int64_t unmrq_scratchpad_size>(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, diff --git a/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp b/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp index 2a2e7d1ab..559cf1cb6 100644 --- a/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp +++ b/src/lapack/backends/rocsolver/rocsolver_scope_handle.cpp @@ -46,7 +46,7 @@ thread_local rocsolver_handle RocsolverScopedContextHandler::handle_ #endif RocsolverScopedContextHandler::RocsolverScopedContextHandler(sycl::queue queue, - sycl::interop_handle &ih) + sycl::interop_handle& ih) : ih(ih), needToRecover_(false) { placedContext_ = new sycl::context(queue.get_context()); @@ -76,8 +76,8 @@ RocsolverScopedContextHandler::~RocsolverScopedContextHandler() noexcept(false) delete placedContext_; } -void ContextCallback(void *userData) { - auto *ptr = static_cast *>(userData); +void ContextCallback(void* userData) { + auto* ptr = static_cast*>(userData); if (!ptr) { return; } @@ -95,7 +95,7 @@ void ContextCallback(void *userData) { } } -rocblas_handle RocsolverScopedContextHandler::get_handle(const sycl::queue &queue) { +rocblas_handle RocsolverScopedContextHandler::get_handle(const sycl::queue& queue) { auto hipDevice = ih.get_native_device(); hipError_t hipErr; hipCtx_t desired; @@ -142,10 +142,10 @@ rocblas_handle RocsolverScopedContextHandler::get_handle(const sycl::queue &queu return handle; } -hipStream_t RocsolverScopedContextHandler::get_stream(const sycl::queue &queue) { +hipStream_t RocsolverScopedContextHandler::get_stream(const sycl::queue& queue) { return sycl::get_native(queue); } -sycl::context RocsolverScopedContextHandler::get_context(const sycl::queue &queue) { +sycl::context RocsolverScopedContextHandler::get_context(const sycl::queue& queue) { return queue.get_context(); } diff --git a/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp b/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp index 443a1ca2a..19b26a2df 100644 --- a/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp +++ b/src/lapack/backends/rocsolver/rocsolver_scope_handle.hpp @@ -52,23 +52,23 @@ namespace rocsolver { class RocsolverScopedContextHandler { hipCtx_t original_; - sycl::context *placedContext_; + sycl::context* placedContext_; bool needToRecover_; - sycl::interop_handle &ih; + sycl::interop_handle& ih; #ifdef ONEMKL_PI_INTERFACE_REMOVED static thread_local rocsolver_handle handle_helper; #else static thread_local rocsolver_handle handle_helper; #endif - hipStream_t get_stream(const sycl::queue &queue); - sycl::context get_context(const sycl::queue &queue); + hipStream_t get_stream(const sycl::queue& queue); + sycl::context get_context(const sycl::queue& queue); public: - RocsolverScopedContextHandler(sycl::queue queue, sycl::interop_handle &ih); + RocsolverScopedContextHandler(sycl::queue queue, sycl::interop_handle& ih); ~RocsolverScopedContextHandler() noexcept(false); - rocblas_handle get_handle(const sycl::queue &queue); + rocblas_handle get_handle(const sycl::queue& queue); // This is a work-around function for reinterpret_casting the memory. This // will be fixed when SYCL-2020 has been implemented for Pi backend. template diff --git a/src/lapack/backends/rocsolver/rocsolver_task.hpp b/src/lapack/backends/rocsolver/rocsolver_task.hpp index 4842a18e9..b3b89a8b8 100644 --- a/src/lapack/backends/rocsolver/rocsolver_task.hpp +++ b/src/lapack/backends/rocsolver/rocsolver_task.hpp @@ -51,9 +51,9 @@ namespace lapack { namespace rocsolver { template -static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { +static inline void host_task_internal(H& cgh, sycl::queue queue, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([f, queue](sycl::interop_handle ih) { #else cgh.host_task([f, queue](cl::sycl::interop_handle ih) { #endif @@ -63,7 +63,7 @@ static inline void host_task_internal(H &cgh, sycl::queue queue, F f) { } template -static inline void onemkl_rocsolver_host_task(H &cgh, sycl::queue queue, F f) { +static inline void onemkl_rocsolver_host_task(H& cgh, sycl::queue queue, F f) { (void)host_task_internal(cgh, queue, f); } diff --git a/src/lapack/function_table.hpp b/src/lapack/function_table.hpp index e034fe357..dee8b8d8e 100644 --- a/src/lapack/function_table.hpp +++ b/src/lapack/function_table.hpp @@ -32,1808 +32,1808 @@ typedef struct { int version; - void (*cgebrd_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgebrd_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgebrd_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*zgebrd_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tauq, - sycl::buffer> &taup, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*sgerqf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dgerqf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*cgerqf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zgerqf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cgeqrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgeqrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgeqrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*zgeqrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cgetrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgetrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgetrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*zgetrf_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cgetri_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgetri_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgetri_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*zgetri_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cgetrs_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgetrs_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgetrs_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*zgetrs_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dgesvd_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sgesvd_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*cgesvd_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zgesvd_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, - std::int64_t m, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cheevd_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zheevd_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*chegvd_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, + void (*cgebrd_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgebrd_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgebrd_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*zgebrd_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*sgerqf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dgerqf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*cgerqf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zgerqf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cgeqrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgeqrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgeqrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*zgeqrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cgetrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgetrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgetrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*zgetrf_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cgetri_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgetri_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgetri_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*zgetri_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cgetrs_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgetrs_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgetrs_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*zgetrs_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dgesvd_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sgesvd_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*cgesvd_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zgesvd_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cheevd_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zheevd_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*chegvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zhegvd_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, + void (*zhegvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*chetrd_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zhetrd_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &d, sycl::buffer &e, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*chetrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zhetrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*sorgbr_sycl)(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dorgbr_sycl)(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dorgqr_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sorgqr_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sorgtr_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*dorgtr_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*sormtr_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*chetrd_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zhetrd_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*chetrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zhetrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*sorgbr_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dorgbr_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dorgqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sorgqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sorgtr_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*dorgtr_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*sormtr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dormtr_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + void (*dormtr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*sormrq_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dormrq_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dormqr_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*sormqr_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &c, - std::int64_t ldc, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*spotrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dpotrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*cpotrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zpotrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*spotri_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dpotri_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*cpotri_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zpotri_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*spotrs_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dpotrs_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &b, std::int64_t ldb, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*cpotrs_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zpotrs_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*dsyevd_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*ssyevd_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dsygvd_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*ssygvd_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &w, sycl::buffer &scratchpad, - std::int64_t scratchpad_size); - void (*dsytrd_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*ssytrd_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*ssytrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*dsytrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*csytrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zsytrf_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*ctrtrs_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*sormrq_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dormrq_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dormqr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*sormqr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*spotrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dpotrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*cpotrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zpotrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*spotri_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dpotri_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*cpotri_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zpotri_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*spotrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dpotrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*cpotrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zpotrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*dsyevd_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*ssyevd_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dsygvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*ssygvd_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + void (*dsytrd_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*ssytrd_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*ssytrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*dsytrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*csytrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zsytrf_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*ctrtrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*dtrtrs_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + void (*dtrtrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*strtrs_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + void (*strtrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*ztrtrs_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, + void (*ztrtrs_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cungbr_sycl)(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zungbr_sycl)(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cungqr_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zungqr_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cungtr_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*zungtr_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - void (*cunmrq_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cungbr_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zungbr_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cungqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zungqr_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cungtr_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*zungtr_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + void (*cunmrq_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zunmrq_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + void (*zunmrq_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*cunmqr_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + void (*cunmqr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zunmqr_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, + void (*zunmqr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*cunmtr_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + void (*cunmtr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zunmtr_sycl)(sycl::queue &queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, + void (*zunmtr_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, - sycl::buffer> &c, std::int64_t ldc, - sycl::buffer> &scratchpad, - std::int64_t scratchpad_size); - sycl::event (*cgebrd_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgebrd_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *d, double *e, double *tauq, - double *taup, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgebrd_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgebrd_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgerqf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + sycl::event (*cgebrd_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgebrd_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tauq, + double* taup, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sgebrd_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zgebrd_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sgerqf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgerqf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgerqf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgerqf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*cgerqf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgerqf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgerqf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgeqrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*cgeqrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgeqrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgeqrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgeqrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *tau, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgeqrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgeqrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgeqrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies); + sycl::event (*cgetrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetrf_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetri_usm_sycl)(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetri_usm_sycl)(sycl::queue &queue, std::int64_t n, double *a, std::int64_t lda, - std::int64_t *ipiv, double *scratchpad, + const std::vector& dependencies); + sycl::event (*zgetrf_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgetri_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetri_usm_sycl)(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetri_usm_sycl)(sycl::queue &queue, std::int64_t n, float *a, std::int64_t lda, - std::int64_t *ipiv, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetri_usm_sycl)(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetri_usm_sycl)(sycl::queue &queue, std::int64_t n, std::complex *a, - std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t *ipiv, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t *ipiv, float *b, std::int64_t ldb, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgesvd_usm_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + const std::vector& dependencies); + sycl::event (*zgetri_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgetrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t* ipiv, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sgetrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t* ipiv, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zgetrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgesvd_usm_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *s, double *u, - std::int64_t ldu, double *vt, std::int64_t ldvt, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgesvd_usm_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sgesvd_usm_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *s, float *u, std::int64_t ldu, - float *vt, std::int64_t ldvt, float *scratchpad, + float* a, std::int64_t lda, float* s, float* u, std::int64_t ldu, + float* vt, std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgesvd_usm_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + const std::vector& dependencies); + sycl::event (*cgesvd_usm_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *s, - std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgesvd_usm_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + std::complex* a, std::int64_t lda, float* s, + std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zgesvd_usm_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, - std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cheevd_usm_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, float *w, std::complex *scratchpad, + std::complex* a, std::int64_t lda, double* s, + std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cheevd_usm_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zheevd_usm_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, double *w, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zheevd_usm_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*chegvd_usm_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - float *w, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*chegvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zhegvd_usm_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - double *w, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zhegvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*chetrd_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*chetrd_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zhetrd_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zhetrd_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*chetrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zhetrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgbr_usm_sycl)(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgbr_usm_sycl)(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgqr_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgqr_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgtr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, + const std::vector& dependencies); + sycl::event (*chetrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zhetrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgbr_usm_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dorgbr_usm_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dorgqr_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgqr_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgtr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dorgtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sormtr_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*sormtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, float *a, std::int64_t lda, - float *tau, float *c, std::int64_t ldc, float *scratchpad, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dormtr_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*dormtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, double *a, std::int64_t lda, - double *tau, double *c, std::int64_t ldc, double *scratchpad, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sormrq_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*sormrq_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dormrq_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*dormrq_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *c, std::int64_t ldc, double *scratchpad, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dormqr_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*dormqr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, - double *c, std::int64_t ldc, double *scratchpad, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* c, std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sormqr_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*sormqr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, + const std::vector& dependencies); + sycl::event (*spotrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dpotrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotri_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *scratchpad, + const std::vector& dependencies); + sycl::event (*cpotrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zpotrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*spotri_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotri_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dpotri_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotri_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotri_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, + const std::vector& dependencies); + sycl::event (*cpotri_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zpotri_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*spotrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dpotrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dsyevd_usm_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *w, double *scratchpad, + const std::vector& dependencies); + sycl::event (*cpotrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zpotrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dsyevd_usm_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* w, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ssyevd_usm_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *w, float *scratchpad, + const std::vector& dependencies); + sycl::event (*ssyevd_usm_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* w, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dsygvd_usm_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ssygvd_usm_sycl)(sycl::queue &queue, std::int64_t itype, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dsytrd_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ssytrd_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ssytrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dsytrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*csytrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zsytrf_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ctrtrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + const std::vector& dependencies); + sycl::event (*dsygvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*ssygvd_usm_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dsytrd_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*ssytrd_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*ssytrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dsytrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*csytrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zsytrf_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*ctrtrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dtrtrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dtrtrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - double *b, std::int64_t ldb, double *scratchpad, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*strtrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + const std::vector& dependencies); + sycl::event (*strtrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - float *b, std::int64_t ldb, float *scratchpad, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*ztrtrs_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + const std::vector& dependencies); + sycl::event (*ztrtrs_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cungbr_usm_sycl)(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungbr_usm_sycl)(sycl::queue &queue, oneapi::mkl::generate vec, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cungqr_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cungbr_usm_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zungbr_usm_sycl)(sycl::queue& queue, oneapi::mkl::generate vec, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cungqr_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungqr_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zungqr_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cungtr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*cungtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungtr_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zungtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cunmrq_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*cunmrq_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zunmrq_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*zunmrq_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cunmqr_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*cunmqr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zunmqr_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*zunmqr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, - std::int64_t ldc, std::complex *scratchpad, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cunmtr_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + const std::vector& dependencies); + sycl::event (*cunmtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zunmtr_usm_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*zunmtr_usm_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, - std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - void (*sgeqrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + void (*sgeqrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dgeqrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*dgeqrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*cgeqrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + void (*cgeqrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zgeqrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + void (*zgeqrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*sgetri_batch_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &a, + void (*sgetri_batch_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dgetri_batch_sycl)(sycl::queue &queue, std::int64_t n, sycl::buffer &a, + void (*dgetri_batch_sycl)(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*cgetri_batch_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + void (*cgetri_batch_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zgetri_batch_sycl)(sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + void (*zgetri_batch_sycl)(sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*sgetrs_batch_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, + void (*sgetrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*dgetrs_batch_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer &b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*dgetrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*cgetrs_batch_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*cgetrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zgetrs_batch_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + void (*zgetrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*sgetrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*sgetrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dgetrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*dgetrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*cgetrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + void (*cgetrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zgetrf_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, + void (*zgetrf_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*sorgqr_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*sorgqr_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dorgqr_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*dorgqr_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*spotrf_batch_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*spotrf_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*dpotrf_batch_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, + void (*dpotrf_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size); - void (*cpotrf_batch_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + void (*cpotrf_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zpotrf_batch_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, + void (*zpotrf_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*spotrs_batch_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + void (*spotrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*dpotrs_batch_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*dpotrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size); - void (*cpotrs_batch_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + void (*cpotrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zpotrs_batch_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, + void (*zpotrs_batch_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &b, std::int64_t ldb, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*cungqr_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + void (*cungqr_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - void (*zungqr_batch_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, + void (*zungqr_batch_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); - sycl::event (*sgeqrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + sycl::event (*sgeqrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgeqrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgeqrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgeqrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies); + sycl::event (*cgeqrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgeqrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies); + sycl::event (*zgeqrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgetrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies); + sycl::event (*cgetrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetrf_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies); + sycl::event (*zgetrf_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetri_batch_usm_sycl)(sycl::queue &queue, std::int64_t n, float *a, + const std::vector& dependencies); + sycl::event (*sgetri_batch_usm_sycl)(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, float *scratchpad, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetri_batch_usm_sycl)(sycl::queue &queue, std::int64_t n, double *a, + const std::vector& dependencies); + sycl::event (*dgetri_batch_usm_sycl)(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, double *scratchpad, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetri_batch_usm_sycl)(sycl::queue &queue, std::int64_t n, std::complex *a, + const std::vector& dependencies); + sycl::event (*cgetri_batch_usm_sycl)(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, std::complex *scratchpad, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetri_batch_usm_sycl)(sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, + const std::vector& dependencies); + sycl::event (*zgetri_batch_usm_sycl)(sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrs_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, + const std::vector& dependencies); + sycl::event (*sgetrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, float *b, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrs_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, + const std::vector& dependencies); + sycl::event (*dgetrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, double *b, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*cgetrs_batch_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); sycl::event (*zgetrs_batch_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::complex *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgqr_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgqr_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgqr_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, + const std::vector& dependencies); + sycl::event (*dorgqr_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrf_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*spotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrf_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrf_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*cpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrf_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, + const std::vector& dependencies); + sycl::event (*zpotrf_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrs_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, + const std::vector& dependencies); + sycl::event (*spotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrs_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrs_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrs_batch_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, std::complex *a, + const std::vector& dependencies); + sycl::event (*zpotrs_batch_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, - std::complex *b, std::int64_t ldb, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cungqr_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies); + sycl::event (*cungqr_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungqr_batch_usm_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, + const std::vector& dependencies); + sycl::event (*zungqr_batch_usm_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, - std::complex *scratchpad, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgeqrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgeqrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgeqrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*sgeqrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgeqrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgeqrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgeqrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgeqrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgetrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetrf_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgetrf_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetri_group_usm_sycl)(sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetri_group_usm_sycl)(sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetri_group_usm_sycl)(sycl::queue &queue, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetri_group_usm_sycl)(sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dgetri_group_usm_sycl)(sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cgetri_group_usm_sycl)(sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zgetri_group_usm_sycl)(sycl::queue &queue, std::int64_t *n, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zgetri_group_usm_sycl)(sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sgetrs_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, + const std::vector& dependencies); + sycl::event (*sgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dgetrs_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + const std::vector& dependencies); + sycl::event (*dgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cgetrs_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::transpose *trans, - std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*cgetrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + const std::vector& dependencies); sycl::event (*zgetrs_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, std::complex **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*sorgqr_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dorgqr_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, double **a, std::int64_t *lda, - double **tau, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, std::complex** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*sorgqr_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dorgqr_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, double** a, std::int64_t* lda, + double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrf_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, float **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrf_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, double **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*cpotrf_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*spotrf_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*cpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zpotrf_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::complex **a, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zpotrf_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*spotrs_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, float **a, - std::int64_t *lda, float **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*dpotrs_group_usm_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, double **a, - std::int64_t *lda, double **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + const std::vector& dependencies); + sycl::event (*spotrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, float** a, + std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); + sycl::event (*dpotrs_group_usm_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, double** a, + std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies); sycl::event (*cpotrs_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies); + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies); sycl::event (*zpotrs_group_usm_sycl)( - sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies); - sycl::event (*cungqr_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies); + sycl::event (*cungqr_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); - sycl::event (*zungqr_group_usm_sycl)(sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, + const std::vector& dependencies); + sycl::event (*zungqr_group_usm_sycl)(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies); + const std::vector& dependencies); - std::int64_t (*sgebrd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sgebrd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*dgebrd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dgebrd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*cgebrd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cgebrd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*zgebrd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zgebrd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*sgerqf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sgerqf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*dgerqf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dgerqf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*cgerqf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cgerqf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*zgerqf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zgerqf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*sgeqrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*dgeqrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*cgeqrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*zgeqrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zgeqrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*sgesvd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + std::int64_t (*sgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); - std::int64_t (*dgesvd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + std::int64_t (*dgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); - std::int64_t (*cgesvd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + std::int64_t (*cgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); - std::int64_t (*zgesvd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::jobsvd jobu, + std::int64_t (*zgesvd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); - std::int64_t (*sgetrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*dgetrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*cgetrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*zgetrf_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zgetrf_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda); - std::int64_t (*sgetri_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*sgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda); - std::int64_t (*dgetri_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*dgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda); - std::int64_t (*cgetri_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*cgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda); - std::int64_t (*zgetri_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*zgetri_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda); - std::int64_t (*sgetrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + std::int64_t (*sgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*dgetrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + std::int64_t (*dgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*cgetrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + std::int64_t (*cgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*zgetrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::transpose trans, + std::int64_t (*zgetrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*cheevd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, + std::int64_t (*cheevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zheevd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, + std::int64_t (*zheevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*chegvd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t itype, + std::int64_t (*chegvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); - std::int64_t (*zhegvd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t itype, + std::int64_t (*zhegvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); - std::int64_t (*chetrd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*chetrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zhetrd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zhetrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*chetrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*chetrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zhetrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zhetrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*sorgbr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::generate vect, + std::int64_t (*sorgbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*dorgbr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::generate vect, + std::int64_t (*dorgbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*sorgtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*sorgtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dorgtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dorgtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*sorgqr_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*sorgqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*dorgqr_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*dorgqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*sormrq_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*sormrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*dormrq_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*dormrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*sormqr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*sormqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*dormqr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*dormqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*sormtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*sormtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); - std::int64_t (*dormtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*dormtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); - std::int64_t (*spotrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*spotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dpotrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*cpotrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*cpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zpotrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zpotrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*spotrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*spotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*dpotrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*cpotrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*cpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*zpotrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zpotrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*spotri_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*spotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dpotri_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*cpotri_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*cpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zpotri_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zpotri_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*ssytrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*ssytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dsytrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dsytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*csytrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*csytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zsytrf_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zsytrf_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*ssyevd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, + std::int64_t (*ssyevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dsyevd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::job jobz, + std::int64_t (*dsyevd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*ssygvd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t itype, + std::int64_t (*ssygvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); - std::int64_t (*dsygvd_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t itype, + std::int64_t (*dsygvd_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb); - std::int64_t (*ssytrd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*ssytrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*dsytrd_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dsytrd_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*strtrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*strtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*dtrtrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dtrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*ctrtrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*ctrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*ztrtrs_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*ztrtrs_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); - std::int64_t (*cungbr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::generate vect, + std::int64_t (*cungbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*zungbr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::generate vect, + std::int64_t (*zungbr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*cungqr_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*cungqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*zungqr_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, std::int64_t n, + std::int64_t (*zungqr_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda); - std::int64_t (*cungtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*cungtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*zungtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zungtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda); - std::int64_t (*cunmrq_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*cunmrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*zunmrq_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*zunmrq_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*cunmqr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*cunmqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*zunmqr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*zunmqr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc); - std::int64_t (*cunmtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*cunmtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); - std::int64_t (*zunmtr_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::side side, + std::int64_t (*zunmtr_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc); - std::int64_t (*sgetrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*sgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*dgetrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*dgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*cgetrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*cgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*zgetrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*zgetrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*sgetri_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*sgetri_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*dgetri_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*dgetri_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*cgetri_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*cgetri_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); - std::int64_t (*zgetri_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t n, + std::int64_t (*zgetri_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size); std::int64_t (*sgetrs_batch_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); std::int64_t (*dgetrs_batch_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); std::int64_t (*cgetrs_batch_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); std::int64_t (*zgetrs_batch_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*sgeqrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*sgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*dgeqrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*dgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*cgeqrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*cgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*zgeqrf_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*zgeqrf_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*spotrf_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*spotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); - std::int64_t (*dpotrf_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); - std::int64_t (*cpotrf_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*cpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); - std::int64_t (*zpotrf_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zpotrf_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size); - std::int64_t (*spotrs_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*spotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*dpotrs_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*dpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*cpotrs_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*cpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*zpotrs_batch_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo uplo, + std::int64_t (*zpotrs_batch_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size); - std::int64_t (*sorgqr_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*sorgqr_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*dorgqr_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*dorgqr_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*cungqr_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*cungqr_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*zungqr_batch_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t m, + std::int64_t (*zungqr_batch_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size); - std::int64_t (*sgetrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t (*sgetrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dgetrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*dgetrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cgetrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*cgetrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zgetrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*zgetrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*sgetri_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dgetri_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cgetri_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zgetri_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); + std::int64_t (*sgetri_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*dgetri_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*cgetri_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*zgetri_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); std::int64_t (*sgetrs_group_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); std::int64_t (*dgetrs_group_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); std::int64_t (*cgetrs_group_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); std::int64_t (*zgetrs_group_scratchpad_size_sycl)( - sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes); - std::int64_t (*sgeqrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes); + std::int64_t (*sgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dgeqrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*dgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cgeqrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*cgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zgeqrf_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*zgeqrf_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*sorgqr_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dorgqr_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*spotrf_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*sorgqr_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*dorgqr_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*spotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dpotrf_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*dpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cpotrf_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*cpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zpotrf_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *lda, + std::int64_t* group_sizes); + std::int64_t (*zpotrf_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*spotrs_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, + std::int64_t* group_sizes); + std::int64_t (*spotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*dpotrs_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, + std::int64_t* group_sizes); + std::int64_t (*dpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cpotrs_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, + std::int64_t* group_sizes); + std::int64_t (*cpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zpotrs_group_scratchpad_size_sycl)(sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, - std::int64_t *lda, std::int64_t *ldb, + std::int64_t* group_sizes); + std::int64_t (*zpotrs_group_scratchpad_size_sycl)(sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*cungqr_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); - std::int64_t (*zungqr_group_scratchpad_size_sycl)(sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes); + std::int64_t* group_sizes); + std::int64_t (*cungqr_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + std::int64_t (*zungqr_group_scratchpad_size_sycl)(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); } lapack_function_table_t; diff --git a/src/lapack/lapack_loader.cpp b/src/lapack/lapack_loader.cpp index f26e5f5ad..f558cca09 100644 --- a/src/lapack/lapack_loader.cpp +++ b/src/lapack/lapack_loader.cpp @@ -30,2063 +30,2063 @@ namespace detail { static oneapi::mkl::detail::table_initializer function_tables; -void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tauq, - sycl::buffer> &taup, sycl::buffer> &scratchpad, +void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tauq, sycl::buffer &taup, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tauq, sycl::buffer &taup, sycl::buffer &scratchpad, +void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tauq, - sycl::buffer> &taup, sycl::buffer> &scratchpad, +void gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgebrd_sycl(queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size); } -void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgerqf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgeqrf_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size); } -void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetrf_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer &scratchpad, +void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetri_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, sycl::buffer> &b, - std::int64_t ldb, sycl::buffer> &scratchpad, +void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetrs_sycl(queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size); } -void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, +void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &s, sycl::buffer &u, std::int64_t ldu, - sycl::buffer &vt, std::int64_t ldvt, sycl::buffer &scratchpad, +void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, +void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, +void gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &s, - sycl::buffer> &u, std::int64_t ldu, - sycl::buffer> &vt, std::int64_t ldvt, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgesvd_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size); } -void heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &w, sycl::buffer> &scratchpad, +void heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer> &a, - std::int64_t lda, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zheevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, +void hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].chegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, +void hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zhegvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].chetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zhetrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].chetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zhetrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, std::int64_t lda, - sycl::buffer &tau, sycl::buffer &scratchpad, +void orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer &a, - std::int64_t lda, sycl::buffer &tau, sycl::buffer &scratchpad, +void orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dorgbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sorgqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dorgtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dormtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dormrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer &a, std::int64_t lda, sycl::buffer &tau, - sycl::buffer &c, std::int64_t ldc, sycl::buffer &scratchpad, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sormqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotrf_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &scratchpad, +void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotri_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size); } -void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, sycl::buffer &b, - std::int64_t ldb, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - std::int64_t nrhs, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotrs_sycl(queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, +void syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dsyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, std::int64_t lda, - sycl::buffer &w, sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ssyevd_sycl(queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size); } -void sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dsygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer &a, - std::int64_t lda, sycl::buffer &b, std::int64_t ldb, sycl::buffer &w, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ssygvd_sycl(queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size); } -void sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, - sycl::buffer &e, sycl::buffer &tau, sycl::buffer &scratchpad, +void sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dsytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &d, sycl::buffer &e, - sycl::buffer &tau, sycl::buffer &scratchpad, +void sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ssytrd_sycl(queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); } -void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ssytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, sycl::buffer &ipiv, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].csytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer &ipiv, sycl::buffer> &scratchpad, +void sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zsytrf_sycl(queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); } -void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ctrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dtrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer &a, std::int64_t lda, sycl::buffer &b, std::int64_t ldb, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].strtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +void trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &b, std::int64_t ldb, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].ztrtrs_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size); } -void ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer> &a, - std::int64_t lda, sycl::buffer> &tau, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { +void ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zungbr_sycl(queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zungqr_sycl(queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); } -void ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &scratchpad, +void ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zungtr_sycl(queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); } -void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zunmrq_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zunmqr_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -void unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +void unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, - sycl::buffer> &tau, sycl::buffer> &c, - std::int64_t ldc, sycl::buffer> &scratchpad, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zunmtr_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size); } -sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgebrd_usm_sycl( queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *d, double *e, double *tauq, double *taup, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* d, double* e, double* tauq, double* taup, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgebrd_usm_sycl( queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *d, float *e, float *tauq, float *taup, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* d, float* e, float* tauq, float* taup, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgebrd_usm_sycl( queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tauq, std::complex *taup, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gebrd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgebrd_usm_sycl( queue, m, n, a, lda, d, e, tauq, taup, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gerqf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgerqf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgeqrf_usm_sycl(queue, m, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - double *a, std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrf_usm_sycl(queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t *ipiv, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getri(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetri_usm_sycl(queue, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrs_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t *ipiv, double *b, std::int64_t ldb, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t* ipiv, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrs_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, std::int64_t *ipiv, - float *b, std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, + float* b, std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrs_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t *ipiv, std::complex *b, std::int64_t ldb, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrs_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, double *a, - std::int64_t lda, double *s, double *u, std::int64_t ldu, double *vt, - std::int64_t ldvt, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* s, double* u, std::int64_t ldu, double* vt, + std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float *a, - std::int64_t lda, float *s, float *u, std::int64_t ldu, float *vt, - std::int64_t ldvt, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* s, float* u, std::int64_t ldu, float* vt, + std::int64_t ldvt, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, - oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::complex *a, - std::int64_t lda, float *s, std::complex *u, std::int64_t ldu, - std::complex *vt, std::int64_t ldvt, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, + oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* s, std::complex* u, std::int64_t ldu, + std::complex* vt, std::int64_t ldvt, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::jobsvd jobu, +sycl::event gesvd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, - std::complex *a, std::int64_t lda, double *s, std::complex *u, - std::int64_t ldu, std::complex *vt, std::int64_t ldvt, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* a, std::int64_t lda, double* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgesvd_usm_sycl(queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, dependencies); } -sycl::event heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - float *w, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + float* w, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cheevd_usm_sycl( queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event heevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, std::complex *a, std::int64_t lda, - double *w, std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event heevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + double* w, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zheevd_usm_sycl( queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, +sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, float *w, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].chegvd_usm_sycl( queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, +sycl::event hegvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, - std::complex *a, std::int64_t lda, std::complex *b, - std::int64_t ldb, double *w, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zhegvd_usm_sycl( queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, float *d, float *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].chetrd_usm_sycl( queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, double *d, double *e, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event hetrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zhetrd_usm_sycl( queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].chetrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event hetrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zhetrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - float *tau, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgbr_usm_sycl( queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - double *tau, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgbr_usm_sycl( queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgqr_usm_sycl( queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgqr_usm_sycl( queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *tau, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgtr_usm_sycl( queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *tau, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event orgtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgtr_usm_sycl( queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, float *tau, float *c, - std::int64_t ldc, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sormtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event ormtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, double *tau, double *c, - std::int64_t ldc, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dormtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event ormrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + double* a, std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dormrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - double *a, std::int64_t lda, double *tau, double *c, std::int64_t ldc, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + double* a, std::int64_t lda, double* tau, double* c, std::int64_t ldc, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event ormqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - float *a, std::int64_t lda, float *tau, float *c, std::int64_t ldc, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + float* a, std::int64_t lda, float* tau, float* c, std::int64_t ldc, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sormqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrf_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potri(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potri(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotri_usm_sycl(queue, uplo, n, a, lda, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, float *b, - std::int64_t ldb, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrs_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, double *b, - std::int64_t ldb, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrs_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrs_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event potrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrs_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, double *a, std::int64_t lda, double *w, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dsyevd_usm_sycl( queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event syevd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::job jobz, - oneapi::mkl::uplo uplo, std::int64_t n, float *a, std::int64_t lda, float *w, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event syevd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, + oneapi::mkl::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].ssyevd_usm_sycl( queue, jobz, uplo, n, a, lda, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, double *a, - std::int64_t lda, double *b, std::int64_t ldb, double *w, double *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* w, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].dsygvd_usm_sycl( queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t itype, - oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, float *a, - std::int64_t lda, float *b, std::int64_t ldb, float *w, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sygvd(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, + oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* w, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].ssygvd_usm_sycl( queue, itype, jobz, uplo, n, a, lda, b, ldb, w, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, double *d, double *e, double *tau, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dsytrd_usm_sycl( queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, float *d, float *e, float *tau, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrd(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].ssytrd_usm_sycl( queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, std::int64_t *ipiv, float *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].ssytrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, std::int64_t *ipiv, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dsytrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].csytrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, std::int64_t *ipiv, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event sytrf(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zsytrf_usm_sycl( queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].ctrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, double *a, std::int64_t lda, double *b, std::int64_t ldb, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t nrhs, double* a, std::int64_t lda, double* b, std::int64_t ldb, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dtrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, float *a, std::int64_t lda, float *b, std::int64_t ldb, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t nrhs, float* a, std::int64_t lda, float* b, std::int64_t ldb, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].strtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, +sycl::event trtrs(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, - std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::complex *b, std::int64_t ldb, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].ztrtrs_usm_sycl(queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, scratchpad, scratchpad_size, dependencies); } -sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungbr_usm_sycl( queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::generate vec, - std::int64_t m, std::int64_t n, std::int64_t k, std::complex *a, - std::int64_t lda, std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungbr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungbr_usm_sycl( queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungqr_usm_sycl( queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungqr(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungqr_usm_sycl( queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungtr_usm_sycl( queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { +sycl::event ungtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungtr_usm_sycl( queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, dependencies); } -sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event unmrq(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zunmrq_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].cunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event unmqr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, - std::complex *a, std::int64_t lda, std::complex *tau, - std::complex *c, std::int64_t ldc, std::complex *scratchpad, - std::int64_t scratchpad_size, const std::vector &dependencies) { + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies) { return function_tables[{ libkey, queue }].zunmqr_usm_sycl(queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::side side, +sycl::event unmtr(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::complex *tau, std::complex *c, std::int64_t ldc, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zunmtr_usm_sycl(queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, scratchpad_size, dependencies); } -void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgeqrf_batch_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgeqrf_batch_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgeqrf_batch_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer> &tau, std::int64_t stride_tau, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgeqrf_batch_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetri_batch_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetri_batch_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetri_batch_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetri_batch_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &ipiv, std::int64_t stride_ipiv, - sycl::buffer &b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, std::int64_t ldb, +void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer &ipiv, - std::int64_t stride_ipiv, sycl::buffer> &b, std::int64_t ldb, +void getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetrs_batch_sycl(queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sgetrf_batch_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer &scratchpad, +void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dgetrf_batch_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cgetrf_batch_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - sycl::buffer> &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, sycl::buffer> &scratchpad, +void getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zgetrf_batch_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size); } -void orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].sorgqr_batch_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - sycl::buffer &tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer &scratchpad, std::int64_t scratchpad_size) { +void orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dorgqr_batch_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer &a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, sycl::buffer &scratchpad, +void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, +void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, sycl::buffer> &a, std::int64_t lda, +void potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotrf_batch_sycl(queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].spotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer &b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, sycl::buffer &scratchpad, +void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].dpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer> &b, +void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, sycl::buffer> &a, - std::int64_t lda, std::int64_t stride_a, sycl::buffer> &b, +void potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zpotrs_batch_sycl(queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size); } -void ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].cungqr_batch_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -void ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, - std::int64_t k, sycl::buffer> &a, std::int64_t lda, - std::int64_t stride_a, sycl::buffer> &tau, +void ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, std::int64_t stride_tau, std::int64_t batch_size, - sycl::buffer> &scratchpad, std::int64_t scratchpad_size) { + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { function_tables[{ libkey, queue }].zungqr_batch_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size); } -sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, - float *tau, std::int64_t stride_tau, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, + float* tau, std::int64_t stride_tau, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgeqrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, - double *tau, std::int64_t stride_tau, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, + double* tau, std::int64_t stride_tau, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgeqrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgeqrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgeqrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::int64_t *ipiv, std::int64_t stride_ipiv, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrf_batch_usm_sycl( queue, m, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, float *scratchpad, +sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetri_batch_usm_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, std::int64_t batch_size, double *scratchpad, +sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetri_batch_usm_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetri_batch_usm_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetri_batch_usm_sycl( queue, n, a, lda, stride_a, ipiv, stride_ipiv, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, float *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, double *a, - std::int64_t lda, std::int64_t stride_a, std::int64_t *ipiv, - std::int64_t stride_ipiv, double *b, std::int64_t ldb, - std::int64_t stride_b, std::int64_t batch_size, double *scratchpad, +sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, +sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, - std::complex *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t *ipiv, std::int64_t stride_ipiv, std::complex *b, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrs_batch_usm_sycl( queue, trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, float *a, std::int64_t lda, - std::int64_t stride_a, float *tau, std::int64_t stride_tau, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgqr_batch_usm_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, double *a, std::int64_t lda, - std::int64_t stride_a, double *tau, std::int64_t stride_tau, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgqr_batch_usm_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, float *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, double *a, std::int64_t lda, std::int64_t stride_a, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, +sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::complex *a, std::int64_t lda, +sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrf_batch_usm_sycl( queue, uplo, n, a, lda, stride_a, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, float *a, std::int64_t lda, - std::int64_t stride_a, float *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, float* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrs_batch_usm_sycl( queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, double *a, std::int64_t lda, - std::int64_t stride_a, double *b, std::int64_t ldb, std::int64_t stride_b, - std::int64_t batch_size, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, double* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrs_batch_usm_sycl( queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *b, std::int64_t ldb, +sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrs_batch_usm_sycl( queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, - std::int64_t n, std::int64_t nrhs, std::complex *a, - std::int64_t lda, std::int64_t stride_a, std::complex *b, +sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrs_batch_usm_sycl( queue, uplo, n, nrhs, a, lda, stride_a, b, ldb, stride_b, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungqr_batch_usm_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, - std::int64_t n, std::int64_t k, std::complex *a, std::int64_t lda, - std::int64_t stride_a, std::complex *tau, std::int64_t stride_tau, - std::int64_t batch_size, std::complex *scratchpad, +sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungqr_batch_usm_sycl( queue, m, n, k, a, lda, stride_a, tau, stride_tau, batch_size, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, +sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgeqrf_group_usm_sycl( queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, double **tau, - std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, +sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgeqrf_group_usm_sycl( queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgeqrf_group_usm_sycl( queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event geqrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgeqrf_group_usm_sycl( queue, m, n, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, float **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, +sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrf_group_usm_sycl( queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, double **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, double *scratchpad, +sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrf_group_usm_sycl( queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrf_group_usm_sycl( queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t **ipiv, std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrf_group_usm_sycl( queue, m, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, float **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetri_group_usm_sycl( queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, double **a, - std::int64_t *lda, std::int64_t **ipiv, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetri_group_usm_sycl( queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetri_group_usm_sycl( queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *n, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getri_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetri_group_usm_sycl( queue, n, a, lda, ipiv, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - float **a, std::int64_t *lda, std::int64_t **ipiv, float **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, std::int64_t** ipiv, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sgetrs_group_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - double **a, std::int64_t *lda, std::int64_t **ipiv, double **b, - std::int64_t *ldb, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, std::int64_t** ipiv, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dgetrs_group_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cgetrs_group_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, std::int64_t *nrhs, - std::complex **a, std::int64_t *lda, std::int64_t **ipiv, - std::complex **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event getrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zgetrs_group_usm_sycl( queue, trans, n, nrhs, a, lda, ipiv, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, float **a, std::int64_t *lda, float **tau, - std::int64_t group_count, std::int64_t *group_sizes, float *scratchpad, +sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].sorgqr_group_usm_sycl( queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, double **a, std::int64_t *lda, - double **tau, std::int64_t group_count, std::int64_t *group_sizes, - double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event orgqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double** a, std::int64_t* lda, + double** tau, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dorgqr_group_usm_sycl( queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, float **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, float** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrf_group_usm_sycl( queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, double **a, std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, double** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrf_group_usm_sycl( queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrf_group_usm_sycl( queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::complex **a, std::int64_t *lda, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrf_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrf_group_usm_sycl( queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, float **a, std::int64_t *lda, - float **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, float *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, float** a, std::int64_t* lda, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].spotrs_group_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, double **a, std::int64_t *lda, - double **b, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes, double *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, double** a, std::int64_t* lda, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].dpotrs_group_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cpotrs_group_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, - std::int64_t *n, std::int64_t *nrhs, std::complex **a, - std::int64_t *lda, std::complex **b, std::int64_t *ldb, - std::int64_t group_count, std::int64_t *group_sizes, - std::complex *scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { +sycl::event potrs_batch(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zpotrs_group_usm_sycl( queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].cungqr_group_usm_sycl( queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } -sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *k, std::complex **a, - std::int64_t *lda, std::complex **tau, std::int64_t group_count, - std::int64_t *group_sizes, std::complex *scratchpad, +sycl::event ungqr_batch(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, - const std::vector &dependencies) { + const std::vector& dependencies) { return function_tables[{ libkey, queue }].zungqr_group_usm_sycl( queue, m, n, k, a, lda, tau, group_count, group_sizes, scratchpad, scratchpad_size, dependencies); } template <> -std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t gebrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gebrd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgebrd_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t gerqf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t gerqf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgerqf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t geqrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t geqrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgeqrf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { @@ -2094,7 +2094,7 @@ std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queu lda, ldu, ldvt); } template <> -std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { @@ -2103,7 +2103,7 @@ std::int64_t gesvd_scratchpad_size(oneapi::mkl::device libkey, sycl::que } template <> std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, @@ -2113,7 +2113,7 @@ std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, std::int64_t m, std::int64_t n, std::int64_t lda, @@ -2122,58 +2122,58 @@ std::int64_t gesvd_scratchpad_size>(oneapi::mkl::device lib lda, ldu, ldvt); } template <> -std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> std::int64_t getrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgetrf_scratchpad_size_sycl(queue, m, n, lda); } template <> -std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sgetri_scratchpad_size_sycl(queue, n, lda); } template <> -std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cgetri_scratchpad_size_sycl(queue, n, lda); } template <> std::int64_t getri_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t n, + sycl::queue& queue, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zgetri_scratchpad_size_sycl(queue, n, lda); } template <> -std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].sgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, lda, ldb); } template <> -std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].dgetrs_scratchpad_size_sycl(queue, trans, n, nrhs, @@ -2181,7 +2181,7 @@ std::int64_t getrs_scratchpad_size(oneapi::mkl::device libkey, sycl::que } template <> std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2190,7 +2190,7 @@ std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t getrs_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2199,7 +2199,7 @@ std::int64_t getrs_scratchpad_size>(oneapi::mkl::device lib } template <> std::int64_t heevd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::job jobz, + sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cheevd_scratchpad_size_sycl(queue, jobz, uplo, n, @@ -2207,7 +2207,7 @@ std::int64_t heevd_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t heevd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::job jobz, + sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zheevd_scratchpad_size_sycl(queue, jobz, uplo, n, @@ -2215,7 +2215,7 @@ std::int64_t heevd_scratchpad_size>(oneapi::mkl::device lib } template <> std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t itype, + sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -2224,7 +2224,7 @@ std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t itype, + sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -2233,68 +2233,68 @@ std::int64_t hegvd_scratchpad_size>(oneapi::mkl::device lib } template <> std::int64_t hetrd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].chetrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrd_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zhetrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].chetrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t hetrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zhetrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].sorgbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); } template <> -std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgbr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].dorgbr_scratchpad_size_sycl(queue, vect, m, n, k, lda); } template <> -std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].sorgtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dorgtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].sorgqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> -std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].dorgqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> -std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2302,7 +2302,7 @@ std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queu k, lda, ldc); } template <> -std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2310,7 +2310,7 @@ std::int64_t ormrq_scratchpad_size(oneapi::mkl::device libkey, sycl::que k, lda, ldc); } template <> -std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2318,7 +2318,7 @@ std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queu k, lda, ldc); } template <> -std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { @@ -2326,7 +2326,7 @@ std::int64_t ormqr_scratchpad_size(oneapi::mkl::device libkey, sycl::que k, lda, ldc); } template <> -std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2334,7 +2334,7 @@ std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queu m, n, lda, ldc); } template <> -std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldc) { @@ -2342,38 +2342,38 @@ std::int64_t ormtr_scratchpad_size(oneapi::mkl::device libkey, sycl::que m, n, lda, ldc); } template <> -std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].spotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zpotrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].spotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); } template <> -std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].dpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, @@ -2381,7 +2381,7 @@ std::int64_t potrs_scratchpad_size(oneapi::mkl::device libkey, sycl::que } template <> std::int64_t potrs_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].cpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, @@ -2389,76 +2389,76 @@ std::int64_t potrs_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t potrs_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { return function_tables[{ libkey, queue }].zpotrs_scratchpad_size_sycl(queue, uplo, n, nrhs, lda, ldb); } template <> -std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].spotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potri_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t potri_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zpotri_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].ssytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sytrf_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dsytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].csytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t sytrf_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zsytrf_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].ssyevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); } template <> -std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t syevd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dsyevd_scratchpad_size_sycl(queue, jobz, uplo, n, lda); } template <> -std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -2466,7 +2466,7 @@ std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queu n, lda, ldb); } template <> -std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t itype, oneapi::mkl::job jobz, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t ldb) { @@ -2474,19 +2474,19 @@ std::int64_t sygvd_scratchpad_size(oneapi::mkl::device libkey, sycl::que n, lda, ldb); } template <> -std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].ssytrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t sytrd_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].dsytrd_scratchpad_size_sycl(queue, uplo, n, lda); } template <> -std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2494,7 +2494,7 @@ std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queu n, nrhs, lda, ldb); } template <> -std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t ldb) { @@ -2503,7 +2503,7 @@ std::int64_t trtrs_scratchpad_size(oneapi::mkl::device libkey, sycl::que } template <> std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -2513,7 +2513,7 @@ std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, @@ -2523,7 +2523,7 @@ std::int64_t trtrs_scratchpad_size>(oneapi::mkl::device lib } template <> std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { @@ -2532,7 +2532,7 @@ std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, + sycl::queue& queue, oneapi::mkl::generate vect, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { @@ -2541,33 +2541,33 @@ std::int64_t ungbr_scratchpad_size>(oneapi::mkl::device lib } template <> std::int64_t ungqr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].cungqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ungqr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t m, + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda) { return function_tables[{ libkey, queue }].zungqr_scratchpad_size_sycl(queue, m, n, k, lda); } template <> std::int64_t ungtr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].cungtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t ungtr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::uplo uplo, + sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda) { return function_tables[{ libkey, queue }].zungtr_scratchpad_size_sycl(queue, uplo, n, lda); } template <> std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2577,7 +2577,7 @@ std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2587,7 +2587,7 @@ std::int64_t unmrq_scratchpad_size>(oneapi::mkl::device lib } template <> std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2597,7 +2597,7 @@ std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, @@ -2607,7 +2607,7 @@ std::int64_t unmqr_scratchpad_size>(oneapi::mkl::device lib } template <> std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, @@ -2617,7 +2617,7 @@ std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libk } template <> std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, oneapi::mkl::side side, + sycl::queue& queue, oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, @@ -2626,7 +2626,7 @@ std::int64_t unmtr_scratchpad_size>(oneapi::mkl::device lib m, n, lda, ldc); } template <> -std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2634,7 +2634,7 @@ std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2643,20 +2643,20 @@ std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, syc } template <> std::int64_t getrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cgetrf_batch_scratchpad_size_sycl( queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zgetrf_batch_scratchpad_size_sycl( queue, m, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2664,7 +2664,7 @@ std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { @@ -2673,20 +2673,20 @@ std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, syc } template <> std::int64_t getri_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cgetri_batch_scratchpad_size_sycl( queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> std::int64_t getri_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t n, std::int64_t lda, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zgetri_batch_scratchpad_size_sycl( queue, n, lda, stride_a, stride_ipiv, batch_size); } template <> -std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -2696,7 +2696,7 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> -std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, @@ -2707,7 +2707,7 @@ std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc } template <> std::int64_t getrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cgetrs_batch_scratchpad_size_sycl( @@ -2715,14 +2715,14 @@ std::int64_t getrs_batch_scratchpad_size>( } template <> std::int64_t getrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose trans, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose trans, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zgetrs_batch_scratchpad_size_sycl( queue, trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, batch_size); } template <> -std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2730,7 +2730,7 @@ std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2739,20 +2739,20 @@ std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, syc } template <> std::int64_t geqrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cgeqrf_batch_scratchpad_size_sycl( queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t geqrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zgeqrf_batch_scratchpad_size_sycl( queue, m, n, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { @@ -2760,7 +2760,7 @@ std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl queue, uplo, n, lda, stride_a, batch_size); } template <> -std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { @@ -2769,20 +2769,20 @@ std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, syc } template <> std::int64_t potrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cpotrf_batch_scratchpad_size_sycl( queue, uplo, n, lda, stride_a, batch_size); } template <> std::int64_t potrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zpotrf_batch_scratchpad_size_sycl( queue, uplo, n, lda, stride_a, batch_size); } template <> -std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, @@ -2791,7 +2791,7 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> -std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, @@ -2801,7 +2801,7 @@ std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, syc } template <> std::int64_t potrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cpotrs_batch_scratchpad_size_sycl( @@ -2809,14 +2809,14 @@ std::int64_t potrs_batch_scratchpad_size>( } template <> std::int64_t potrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo uplo, std::int64_t n, + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo uplo, std::int64_t n, std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zpotrs_batch_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); } template <> -std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2824,7 +2824,7 @@ std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, +std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { @@ -2833,245 +2833,245 @@ std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, syc } template <> std::int64_t ungqr_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return function_tables[{ libkey, queue }].cungqr_batch_scratchpad_size_sycl( queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> std::int64_t ungqr_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t m, std::int64_t n, std::int64_t k, + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { return function_tables[{ libkey, queue }].zungqr_batch_scratchpad_size_sycl( queue, m, n, k, lda, stride_a, stride_tau, batch_size); } template <> -std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, +std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sgetrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dgetrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cgetrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t getrf_batch_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zgetrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sgetri_group_scratchpad_size_sycl( queue, n, lda, group_count, group_sizes); } template <> -std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *n, std::int64_t *lda, +std::int64_t getri_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dgetri_group_scratchpad_size_sycl( queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, + sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cgetri_group_scratchpad_size_sycl( queue, n, lda, group_count, group_sizes); } template <> std::int64_t getri_batch_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *n, - std::int64_t *lda, + sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zgetri_group_scratchpad_size_sycl( queue, n, lda, group_count, group_sizes); } template <> -std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sgetrs_group_scratchpad_size_sycl( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t getrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dgetrs_group_scratchpad_size_sycl( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cgetrs_group_scratchpad_size_sycl( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t getrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::transpose *trans, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zgetrs_group_scratchpad_size_sycl( queue, trans, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *lda, +std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sgeqrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t geqrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dgeqrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cgeqrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> std::int64_t geqrf_batch_scratchpad_size>(oneapi::mkl::device libkey, - sycl::queue &queue, std::int64_t *m, - std::int64_t *n, std::int64_t *lda, + sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, std::int64_t group_count, - std::int64_t *group_sizes) { + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zgeqrf_group_scratchpad_size_sycl( queue, m, n, lda, group_count, group_sizes); } template <> -std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].sorgqr_group_scratchpad_size_sycl( queue, m, n, k, lda, group_count, group_sizes); } template <> -std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - std::int64_t *m, std::int64_t *n, std::int64_t *k, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t orgqr_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dorgqr_group_scratchpad_size_sycl( queue, m, n, k, lda, group_count, group_sizes); } template <> -std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].spotrf_group_scratchpad_size_sycl( queue, uplo, n, lda, group_count, group_sizes); } template <> -std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrf_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dpotrf_group_scratchpad_size_sycl( queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cpotrf_group_scratchpad_size_sycl( queue, uplo, n, lda, group_count, group_sizes); } template <> std::int64_t potrf_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zpotrf_group_scratchpad_size_sycl( queue, uplo, n, lda, group_count, group_sizes); } template <> -std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].spotrs_group_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> -std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue &queue, - oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, - std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { +std::int64_t potrs_batch_scratchpad_size(oneapi::mkl::device libkey, sycl::queue& queue, + oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].dpotrs_group_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cpotrs_group_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t potrs_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, oneapi::mkl::uplo *uplo, std::int64_t *n, - std::int64_t *nrhs, std::int64_t *lda, std::int64_t *ldb, std::int64_t group_count, - std::int64_t *group_sizes) { + oneapi::mkl::device libkey, sycl::queue& queue, oneapi::mkl::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zpotrs_group_scratchpad_size_sycl( queue, uplo, n, nrhs, lda, ldb, group_count, group_sizes); } template <> std::int64_t ungqr_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].cungqr_group_scratchpad_size_sycl( queue, m, n, k, lda, group_count, group_sizes); } template <> std::int64_t ungqr_batch_scratchpad_size>( - oneapi::mkl::device libkey, sycl::queue &queue, std::int64_t *m, std::int64_t *n, - std::int64_t *k, std::int64_t *lda, std::int64_t group_count, std::int64_t *group_sizes) { + oneapi::mkl::device libkey, sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes) { return function_tables[{ libkey, queue }].zungqr_group_scratchpad_size_sycl( queue, m, n, k, lda, group_count, group_sizes); } diff --git a/src/rng/backends/curand/curand_task.hpp b/src/rng/backends/curand/curand_task.hpp index 240ced805..0cd46b203 100644 --- a/src/rng/backends/curand/curand_task.hpp +++ b/src/rng/backends/curand/curand_task.hpp @@ -15,18 +15,18 @@ namespace rng { namespace curand { #ifdef __HIPSYCL__ template -static inline void host_task_internal(H &cgh, A acc, E e, F f) { +static inline void host_task_internal(H& cgh, A acc, E e, F f) { cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) { curandStatus_t status; CURAND_CALL(curandSetStream, status, e, ih.get_native_queue()); auto r_ptr = - reinterpret_cast(ih.get_native_mem(acc)); + reinterpret_cast(ih.get_native_mem(acc)); f(r_ptr); }); } template -static inline void host_task_internal(H &cgh, E e, F f) { +static inline void host_task_internal(H& cgh, E e, F f) { cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) { curandStatus_t status; CURAND_CALL(curandSetStream, status, e, ih.get_native_queue()); @@ -35,16 +35,16 @@ static inline void host_task_internal(H &cgh, E e, F f) { } #else template -static inline void host_task_internal(H &cgh, A acc, E e, F f) { +static inline void host_task_internal(H& cgh, A acc, E e, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else cgh.host_task([=](sycl::interop_handle ih) { #endif curandStatus_t status; auto stream = ih.get_native_queue(); CURAND_CALL(curandSetStream, status, e, stream); - auto r_ptr = reinterpret_cast( + auto r_ptr = reinterpret_cast( ih.get_native_mem(acc)); f(r_ptr); #ifndef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND @@ -55,9 +55,9 @@ static inline void host_task_internal(H &cgh, A acc, E e, F f) { } template -static inline void host_task_internal(H &cgh, E e, F f) { +static inline void host_task_internal(H& cgh, E e, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else cgh.host_task([=](sycl::interop_handle ih) { #endif @@ -73,12 +73,12 @@ static inline void host_task_internal(H &cgh, E e, F f) { } #endif template -static inline void onemkl_curand_host_task(H &cgh, A acc, E e, F f) { +static inline void onemkl_curand_host_task(H& cgh, A acc, E e, F f) { host_task_internal(cgh, acc, e, f); } template -static inline void onemkl_curand_host_task(H &cgh, Engine e, F f) { +static inline void onemkl_curand_host_task(H& cgh, Engine e, F f) { host_task_internal(cgh, e, f); } diff --git a/src/rng/backends/mklcpu/cpu_common.hpp b/src/rng/backends/mklcpu/cpu_common.hpp index cbd6cae59..a65338c91 100644 --- a/src/rng/backends/mklcpu/cpu_common.hpp +++ b/src/rng/backends/mklcpu/cpu_common.hpp @@ -34,19 +34,19 @@ namespace mklcpu { // host_task automatically uses run_on_host_intel if it is supported by the // compiler. Otherwise, it falls back to single_task. template -static inline auto host_task_internal(H &cgh, F f, int) -> decltype(cgh.host_task(f)) { +static inline auto host_task_internal(H& cgh, F f, int) -> decltype(cgh.host_task(f)) { return cgh.host_task(f); } template -static inline void host_task_internal(H &cgh, F f, long) { +static inline void host_task_internal(H& cgh, F f, long) { #ifndef __SYCL_DEVICE_ONLY__ cgh.template single_task(f); #endif } template -static inline void host_task(H &cgh, F f) { +static inline void host_task(H& cgh, F f) { (void)host_task_internal(cgh, f, 0); } @@ -57,7 +57,7 @@ template class kernel_name_usm {}; template -typename Acc::value_type *get_raw_ptr(Acc acc) { +typename Acc::value_type* get_raw_ptr(Acc acc) { // Workaround for AdaptiveCPP, as they do not yet support the get_multi_ptr function #ifndef __HIPSYCL__ return acc.template get_multi_ptr().get_raw(); diff --git a/src/rng/backends/rocrand/rocrand_task.hpp b/src/rng/backends/rocrand/rocrand_task.hpp index bad40a9e5..a3e5e375e 100644 --- a/src/rng/backends/rocrand/rocrand_task.hpp +++ b/src/rng/backends/rocrand/rocrand_task.hpp @@ -15,18 +15,18 @@ namespace rng { namespace rocrand { #ifdef __HIPSYCL__ template -static inline void host_task_internal(H &cgh, A acc, E e, F f) { +static inline void host_task_internal(H& cgh, A acc, E e, F f) { cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) { rocrand_status status; ROCRAND_CALL(rocrand_set_stream, status, e, ih.get_native_queue()); auto r_ptr = - reinterpret_cast(ih.get_native_mem(acc)); + reinterpret_cast(ih.get_native_mem(acc)); f(r_ptr); }); } template -static inline void host_task_internal(H &cgh, E e, F f) { +static inline void host_task_internal(H& cgh, E e, F f) { cgh.hipSYCL_enqueue_custom_operation([=](sycl::interop_handle ih) { rocrand_status status; ROCRAND_CALL(rocrand_set_stream, status, e, ih.get_native_queue()); @@ -35,16 +35,16 @@ static inline void host_task_internal(H &cgh, E e, F f) { } #else template -static inline void host_task_internal(H &cgh, A acc, E e, F f) { +static inline void host_task_internal(H& cgh, A acc, E e, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else cgh.host_task([=](sycl::interop_handle ih) { #endif rocrand_status status; auto stream = ih.get_native_queue(); ROCRAND_CALL(rocrand_set_stream, status, e, stream); - auto r_ptr = reinterpret_cast( + auto r_ptr = reinterpret_cast( ih.get_native_mem(acc)); f(r_ptr); #ifndef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND @@ -55,9 +55,9 @@ static inline void host_task_internal(H &cgh, A acc, E e, F f) { } template -static inline void host_task_internal(H &cgh, E e, F f) { +static inline void host_task_internal(H& cgh, E e, F f) { #ifdef SYCL_EXT_ONEAPI_ENQUEUE_NATIVE_COMMAND - cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih){ + cgh.ext_codeplay_enqueue_native_command([=](sycl::interop_handle ih) { #else cgh.host_task([=](sycl::interop_handle ih) { #endif @@ -73,12 +73,12 @@ static inline void host_task_internal(H &cgh, E e, F f) { } #endif template -static inline void onemkl_rocrand_host_task(H &cgh, A acc, E e, F f) { +static inline void onemkl_rocrand_host_task(H& cgh, A acc, E e, F f) { host_task_internal(cgh, acc, e, f); } template -static inline void onemkl_rocrand_host_task(H &cgh, Engine e, F f) { +static inline void onemkl_rocrand_host_task(H& cgh, Engine e, F f) { host_task_internal(cgh, e, f); } diff --git a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx index 3ae84ca64..1e4ab95f1 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_handles.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_handles.cxx @@ -19,22 +19,22 @@ // Dense vector template -void init_dense_vector(sycl::queue & /*queue*/, - oneapi::mkl::sparse::dense_vector_handle_t *p_dvhandle, std::int64_t size, +void init_dense_vector(sycl::queue& /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t* p_dvhandle, std::int64_t size, sycl::buffer val) { *p_dvhandle = new oneapi::mkl::sparse::dense_vector_handle(val, size); } template -void init_dense_vector(sycl::queue & /*queue*/, - oneapi::mkl::sparse::dense_vector_handle_t *p_dvhandle, std::int64_t size, - fpType *val) { +void init_dense_vector(sycl::queue& /*queue*/, + oneapi::mkl::sparse::dense_vector_handle_t* p_dvhandle, std::int64_t size, + fpType* val) { *p_dvhandle = new oneapi::mkl::sparse::dense_vector_handle(val, size); } template -void check_can_reset_value_handle(const std::string &function_name, - InternalHandleT *internal_handle, bool expect_buffer) { +void check_can_reset_value_handle(const std::string& function_name, + InternalHandleT* internal_handle, bool expect_buffer) { if (internal_handle->get_value_type() != detail::get_data_type()) { throw oneapi::mkl::invalid_argument( "sparse_blas", function_name, @@ -49,7 +49,7 @@ void check_can_reset_value_handle(const std::string &function_name, } template -void set_dense_vector_data(sycl::queue & /*queue*/, +void set_dense_vector_data(sycl::queue& /*queue*/, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, sycl::buffer val) { check_can_reset_value_handle(__func__, dvhandle, true); @@ -58,9 +58,9 @@ void set_dense_vector_data(sycl::queue & /*queue*/, } template -void set_dense_vector_data(sycl::queue & /*queue*/, +void set_dense_vector_data(sycl::queue& /*queue*/, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, std::int64_t size, - fpType *val) { + fpType* val) { check_can_reset_value_handle(__func__, dvhandle, false); dvhandle->size = size; dvhandle->set_usm_ptr(val); @@ -72,26 +72,26 @@ void set_dense_vector_data(sycl::queue & /*queue*/, std::int64_t size, sycl::buffer val); \ template void init_dense_vector( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, FP_TYPE * val); \ + std::int64_t size, FP_TYPE* val); \ template void set_dense_vector_data( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ std::int64_t size, sycl::buffer val); \ template void set_dense_vector_data( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, FP_TYPE * val) + std::int64_t size, FP_TYPE* val) FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_VECTOR_FUNCS); #undef INSTANTIATE_DENSE_VECTOR_FUNCS -sycl::event release_dense_vector(sycl::queue &queue, +sycl::event release_dense_vector(sycl::queue& queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, - const std::vector &dependencies) { + const std::vector& dependencies) { return detail::submit_release(queue, dvhandle, dependencies); } // Dense matrix template -void init_dense_matrix(sycl::queue & /*queue*/, - oneapi::mkl::sparse::dense_matrix_handle_t *p_dmhandle, +void init_dense_matrix(sycl::queue& /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout, sycl::buffer val) { *p_dmhandle = @@ -99,16 +99,16 @@ void init_dense_matrix(sycl::queue & /*queue*/, } template -void init_dense_matrix(sycl::queue & /*queue*/, - oneapi::mkl::sparse::dense_matrix_handle_t *p_dmhandle, +void init_dense_matrix(sycl::queue& /*queue*/, + oneapi::mkl::sparse::dense_matrix_handle_t* p_dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, - oneapi::mkl::layout dense_layout, fpType *val) { + oneapi::mkl::layout dense_layout, fpType* val) { *p_dmhandle = new oneapi::mkl::sparse::dense_matrix_handle(val, num_rows, num_cols, ld, dense_layout); } template -void set_dense_matrix_data(sycl::queue & /*queue*/, +void set_dense_matrix_data(sycl::queue& /*queue*/, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, oneapi::mkl::layout dense_layout, sycl::buffer val) { @@ -121,10 +121,10 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, } template -void set_dense_matrix_data(sycl::queue & /*queue*/, +void set_dense_matrix_data(sycl::queue& /*queue*/, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, - oneapi::mkl::layout dense_layout, fpType *val) { + oneapi::mkl::layout dense_layout, fpType* val) { check_can_reset_value_handle(__func__, dmhandle, false); dmhandle->num_rows = num_rows; dmhandle->num_cols = num_cols; @@ -141,7 +141,7 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, template void init_dense_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE * val); \ + oneapi::mkl::layout dense_layout, FP_TYPE* val); \ template void set_dense_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ @@ -149,19 +149,19 @@ void set_dense_matrix_data(sycl::queue & /*queue*/, template void set_dense_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE * val) + oneapi::mkl::layout dense_layout, FP_TYPE* val) FOR_EACH_FP_TYPE(INSTANTIATE_DENSE_MATRIX_FUNCS); #undef INSTANTIATE_DENSE_MATRIX_FUNCS -sycl::event release_dense_matrix(sycl::queue &queue, +sycl::event release_dense_matrix(sycl::queue& queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, - const std::vector &dependencies) { + const std::vector& dependencies) { return detail::submit_release(queue, dmhandle, dependencies); } // COO matrix template -void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, +void init_coo_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { @@ -178,10 +178,10 @@ void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p } template -void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, +void init_coo_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, - fpType *val) { + oneapi::mkl::index_base index, intType* row_ind, intType* col_ind, + fpType* val) { oneapi::mkl::sparse::matrix_handle_t mkl_handle; oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ind, col_ind, val); @@ -193,8 +193,8 @@ void init_coo_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p } template -void check_can_reset_sparse_handle(const std::string &function_name, - detail::sparse_matrix_handle *internal_smhandle, +void check_can_reset_sparse_handle(const std::string& function_name, + detail::sparse_matrix_handle* internal_smhandle, bool expect_buffer) { check_can_reset_value_handle(function_name, internal_smhandle, expect_buffer); if (internal_smhandle->get_int_type() != detail::get_data_type()) { @@ -212,7 +212,7 @@ void check_can_reset_sparse_handle(const std::string &function_name, } template -void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, +void set_coo_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, sycl::buffer row_ind, sycl::buffer col_ind, sycl::buffer val) { @@ -231,10 +231,10 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ } template -void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, +void set_coo_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, - oneapi::mkl::index_base index, intType *row_ind, intType *col_ind, - fpType *val) { + oneapi::mkl::index_base index, intType* row_ind, intType* col_ind, + fpType* val) { auto internal_smhandle = detail::get_internal_handle(smhandle); check_can_reset_sparse_handle(__func__, internal_smhandle, false); internal_smhandle->row_container.set_usm_ptr(row_ind); @@ -255,7 +255,7 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ template void init_coo_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::mkl::index_base index, INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val); \ + oneapi::mkl::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val); \ template void set_coo_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ @@ -263,14 +263,14 @@ void set_coo_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ sycl::buffer val); \ template void set_coo_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ - INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val) + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE* row_ind, \ + INT_TYPE* col_ind, FP_TYPE* val) FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_COO_MATRIX_FUNCS); #undef INSTANTIATE_COO_MATRIX_FUNCS // CSR matrix template -void init_csr_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, +void init_csr_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, oneapi::mkl::index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { @@ -288,10 +288,10 @@ void init_csr_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p } template -void init_csr_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p_smhandle, +void init_csr_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, - oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, - fpType *val) { + oneapi::mkl::index_base index, intType* row_ptr, intType* col_ind, + fpType* val) { oneapi::mkl::sparse::matrix_handle_t mkl_handle; oneapi::mkl::sparse::init_matrix_handle(&mkl_handle); auto internal_smhandle = new detail::sparse_matrix_handle(mkl_handle, row_ptr, col_ind, val); @@ -304,7 +304,7 @@ void init_csr_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t *p } template -void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, +void set_csr_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, oneapi::mkl::index_base index, sycl::buffer row_ptr, sycl::buffer col_ind, sycl::buffer val) { @@ -324,10 +324,10 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ } template -void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, +void set_csr_matrix_data(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t /*nnz*/, - oneapi::mkl::index_base index, intType *row_ptr, intType *col_ind, - fpType *val) { + oneapi::mkl::index_base index, intType* row_ptr, intType* col_ind, + fpType* val) { auto internal_smhandle = detail::get_internal_handle(smhandle); check_can_reset_sparse_handle(__func__, internal_smhandle, false); internal_smhandle->row_container.set_usm_ptr(row_ptr); @@ -349,7 +349,7 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ template void init_csr_matrix( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::mkl::index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val); \ + oneapi::mkl::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val); \ template void set_csr_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ @@ -357,14 +357,14 @@ void set_csr_matrix_data(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_ sycl::buffer val); \ template void set_csr_matrix_data( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ - INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val) + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE* row_ptr, \ + INT_TYPE* col_ind, FP_TYPE* val) FOR_EACH_FP_AND_INT_TYPE(INSTANTIATE_CSR_MATRIX_FUNCS); #undef INSTANTIATE_CSR_MATRIX_FUNCS // Common sparse matrix functions -sycl::event release_sparse_matrix(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, - const std::vector &dependencies) { +sycl::event release_sparse_matrix(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + const std::vector& dependencies) { auto internal_smhandle = detail::get_internal_handle(smhandle); // Asynchronously release the backend's handle followed by the internal handle. auto event = oneapi::mkl::sparse::release_matrix_handle( @@ -372,7 +372,7 @@ sycl::event release_sparse_matrix(sycl::queue &queue, oneapi::mkl::sparse::matri return detail::submit_release(queue, internal_smhandle, event); } -bool set_matrix_property(sycl::queue & /*queue*/, oneapi::mkl::sparse::matrix_handle_t smhandle, +bool set_matrix_property(sycl::queue& /*queue*/, oneapi::mkl::sparse::matrix_handle_t smhandle, oneapi::mkl::sparse::matrix_property property) { auto internal_smhandle = detail::get_internal_handle(smhandle); // Store the matrix property internally for better error checking diff --git a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp index ca15c5b4f..99dc6707d 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_helper.hpp +++ b/src/sparse_blas/backends/mkl_common/mkl_helper.hpp @@ -36,15 +36,15 @@ namespace oneapi::mkl::sparse::detail { /// Return whether a pointer is accessible on the host template -inline bool is_ptr_accessible_on_host(sycl::queue &queue, const T *host_or_device_ptr) { +inline bool is_ptr_accessible_on_host(sycl::queue& queue, const T* host_or_device_ptr) { auto alloc_type = sycl::get_pointer_type(host_or_device_ptr, queue.get_context()); return alloc_type == sycl::usm::alloc::host || alloc_type == sycl::usm::alloc::shared || alloc_type == sycl::usm::alloc::unknown; } /// Throw an exception if the scalar is not accessible in the host -inline void check_ptr_is_host_accessible(const std::string &function_name, - const std::string &scalar_name, +inline void check_ptr_is_host_accessible(const std::string& function_name, + const std::string& scalar_name, bool is_ptr_accessible_on_host) { if (!is_ptr_accessible_on_host) { throw mkl::invalid_argument( @@ -56,7 +56,7 @@ inline void check_ptr_is_host_accessible(const std::string &function_name, /// Return a scalar on the host from a pointer to host or device memory /// Used for USM functions template -inline T get_scalar_on_host(sycl::queue &queue, const T *host_or_device_ptr, +inline T get_scalar_on_host(sycl::queue& queue, const T* host_or_device_ptr, bool is_ptr_accessible_on_host) { if (is_ptr_accessible_on_host) { return *host_or_device_ptr; @@ -68,8 +68,8 @@ inline T get_scalar_on_host(sycl::queue &queue, const T *host_or_device_ptr, } /// Merge multiple event dependencies into one -inline sycl::event collapse_dependencies(sycl::queue &queue, - const std::vector &dependencies) { +inline sycl::event collapse_dependencies(sycl::queue& queue, + const std::vector& dependencies) { if (dependencies.empty()) { return {}; } @@ -77,7 +77,7 @@ inline sycl::event collapse_dependencies(sycl::queue &queue, return dependencies[0]; } - return queue.submit([&](sycl::handler &cgh) { + return queue.submit([&](sycl::handler& cgh) { cgh.depends_on(dependencies); cgh.host_task([=]() {}); }); diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx index dad611252..49987a202 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmm.cxx @@ -35,16 +35,16 @@ struct spmm_descr { namespace oneapi::mkl::sparse::BACKEND { -void init_spmm_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spmm_descr_t *p_spmm_descr) { +void init_spmm_descr(sycl::queue& /*queue*/, oneapi::mkl::sparse::spmm_descr_t* p_spmm_descr) { *p_spmm_descr = new spmm_descr(); } -sycl::event release_spmm_descr(sycl::queue &queue, oneapi::mkl::sparse::spmm_descr_t spmm_descr, - const std::vector &dependencies) { +sycl::event release_spmm_descr(sycl::queue& queue, oneapi::mkl::sparse::spmm_descr_t spmm_descr, + const std::vector& dependencies) { return detail::submit_release(queue, spmm_descr, dependencies); } -void check_valid_spmm(const std::string &function_name, oneapi::mkl::transpose opA, +void check_valid_spmm(const std::string& function_name, oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_matrix_handle_t B_handle, @@ -95,14 +95,14 @@ void check_valid_spmm(const std::string &function_name, oneapi::mkl::transpose o #endif // BACKEND } -void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose /*opB*/, const void *alpha, +void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose /*opB*/, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg /*alg*/, - oneapi::mkl::sparse::spmm_descr_t spmm_descr, std::size_t &temp_buffer_size) { + oneapi::mkl::sparse::spmm_descr_t spmm_descr, std::size_t& temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); @@ -113,9 +113,9 @@ void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, } inline void common_spmm_optimize( - sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void *alpha, + sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); @@ -136,10 +136,10 @@ inline void common_spmm_optimize( spmm_descr->last_optimized_alg = alg; } -void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, oneapi::mkl::sparse::matrix_view A_view, +void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, sycl::buffer /*workspace*/) { @@ -156,15 +156,15 @@ void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl:: // TODO: Add support for spmm_optimize once the close-source oneMKL backend supports it. } -sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, +sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, - oneapi::mkl::sparse::spmm_descr_t spmm_descr, void * /*workspace*/, - const std::vector &dependencies) { + oneapi::mkl::sparse::spmm_descr_t spmm_descr, void* /*workspace*/, + const std::vector& dependencies) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -181,16 +181,16 @@ sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, template sycl::event internal_spmm( - sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void *alpha, + sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, const void* alpha, oneapi::mkl::sparse::matrix_view /*A_view*/, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg /*alg*/, - oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, const std::vector &dependencies, + oneapi::mkl::sparse::spmm_descr_t /*spmm_descr*/, const std::vector& dependencies, bool is_alpha_host_accessible, bool is_beta_host_accessible) { T host_alpha = - detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); T host_beta = - detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); + detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto layout = B_handle->dense_layout; @@ -212,13 +212,13 @@ sycl::event internal_spmm( } } -sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, oneapi::mkl::sparse::matrix_view A_view, +sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); check_valid_spmm(__func__, opA, A_view, A_handle, B_handle, C_handle, is_alpha_host_accessible, diff --git a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx index d2332286b..d5a24e9f4 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spmv.cxx @@ -34,16 +34,16 @@ struct spmv_descr { namespace oneapi::mkl::sparse::BACKEND { -void init_spmv_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spmv_descr_t *p_spmv_descr) { +void init_spmv_descr(sycl::queue& /*queue*/, oneapi::mkl::sparse::spmv_descr_t* p_spmv_descr) { *p_spmv_descr = new spmv_descr(); } -sycl::event release_spmv_descr(sycl::queue &queue, oneapi::mkl::sparse::spmv_descr_t spmv_descr, - const std::vector &dependencies) { +sycl::event release_spmv_descr(sycl::queue& queue, oneapi::mkl::sparse::spmv_descr_t spmv_descr, + const std::vector& dependencies) { return detail::submit_release(queue, spmv_descr, dependencies); } -void check_valid_spmv(const std::string &function_name, oneapi::mkl::transpose opA, +void check_valid_spmv(const std::string& function_name, oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, @@ -85,13 +85,13 @@ void check_valid_spmv(const std::string &function_name, oneapi::mkl::transpose o } } -void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg /*alg*/, - oneapi::mkl::sparse::spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + oneapi::mkl::sparse::spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); @@ -101,11 +101,11 @@ void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void spmv_descr->buffer_size_called = true; } -inline void common_spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +inline void common_spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, - const void *beta, + const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr) { @@ -126,10 +126,10 @@ inline void common_spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, spmv_descr->last_optimized_alg = alg; } -void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, sycl::buffer /*workspace*/) { @@ -157,14 +157,14 @@ void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a } } -sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, - oneapi::mkl::sparse::spmv_descr_t spmv_descr, void * /*workspace*/, - const std::vector &dependencies) { + oneapi::mkl::sparse::spmv_descr_t spmv_descr, void* /*workspace*/, + const std::vector& dependencies) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -190,19 +190,19 @@ sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const } template -sycl::event internal_spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event internal_spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg /*alg*/, oneapi::mkl::sparse::spmv_descr_t /*spmv_descr*/, - const std::vector &dependencies, + const std::vector& dependencies, bool is_alpha_host_accessible, bool is_beta_host_accessible) { T host_alpha = - detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); T host_beta = - detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); + detail::get_scalar_on_host(queue, static_cast(beta), is_beta_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; auto backend_handle = internal_A_handle->backend_handle; @@ -245,13 +245,13 @@ sycl::event internal_spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const } } -sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); bool is_beta_host_accessible = detail::is_ptr_accessible_on_host(queue, beta); check_valid_spmv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, diff --git a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx index 7ef5b3c39..f73000340 100644 --- a/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx +++ b/src/sparse_blas/backends/mkl_common/mkl_spsv.cxx @@ -34,16 +34,16 @@ struct spsv_descr { namespace oneapi::mkl::sparse::BACKEND { -void init_spsv_descr(sycl::queue & /*queue*/, oneapi::mkl::sparse::spsv_descr_t *p_spsv_descr) { +void init_spsv_descr(sycl::queue& /*queue*/, oneapi::mkl::sparse::spsv_descr_t* p_spsv_descr) { *p_spsv_descr = new spsv_descr(); } -sycl::event release_spsv_descr(sycl::queue &queue, oneapi::mkl::sparse::spsv_descr_t spsv_descr, - const std::vector &dependencies) { +sycl::event release_spsv_descr(sycl::queue& queue, oneapi::mkl::sparse::spsv_descr_t spsv_descr, + const std::vector& dependencies) { return detail::submit_release(queue, spsv_descr, dependencies); } -void check_valid_spsv(const std::string &function_name, oneapi::mkl::transpose opA, +void check_valid_spsv(const std::string& function_name, oneapi::mkl::transpose opA, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, @@ -84,13 +84,13 @@ void check_valid_spsv(const std::string &function_name, oneapi::mkl::transpose o } } -void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, - oneapi::mkl::sparse::spsv_descr_t spsv_descr, std::size_t &temp_buffer_size) { + oneapi::mkl::sparse::spsv_descr_t spsv_descr, std::size_t& temp_buffer_size) { // TODO: Add support for external workspace once the close-source oneMKL backend supports it. bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, @@ -99,7 +99,7 @@ void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void spsv_descr->buffer_size_called = true; } -inline void common_spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +inline void common_spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, @@ -122,7 +122,7 @@ inline void common_spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, spsv_descr->last_optimized_alg = alg; } -void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, @@ -142,14 +142,14 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a internal_A_handle->backend_handle); } -sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, - oneapi::mkl::sparse::spsv_descr_t spsv_descr, void * /*workspace*/, - const std::vector &dependencies) { + oneapi::mkl::sparse::spsv_descr_t spsv_descr, void* /*workspace*/, + const std::vector& dependencies) { auto internal_A_handle = detail::get_internal_handle(A_handle); if (internal_A_handle->all_use_buffer()) { detail::throw_incompatible_container(__func__); @@ -164,17 +164,17 @@ sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const } template -sycl::event internal_spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event internal_spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg /*alg*/, oneapi::mkl::sparse::spsv_descr_t /*spsv_descr*/, - const std::vector &dependencies, + const std::vector& dependencies, bool is_alpha_host_accessible) { T host_alpha = - detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); + detail::get_scalar_on_host(queue, static_cast(alpha), is_alpha_host_accessible); auto internal_A_handle = detail::get_internal_handle(A_handle); internal_A_handle->can_be_reset = false; if (internal_A_handle->all_use_buffer()) { @@ -192,13 +192,13 @@ sycl::event internal_spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const } } -sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { bool is_alpha_host_accessible = detail::is_ptr_accessible_on_host(queue, alpha); check_valid_spsv(__func__, opA, A_view, A_handle, x_handle, y_handle, is_alpha_host_accessible, alg); diff --git a/src/sparse_blas/function_table.hpp b/src/sparse_blas/function_table.hpp index d1e3d8189..429468ca1 100644 --- a/src/sparse_blas/function_table.hpp +++ b/src/sparse_blas/function_table.hpp @@ -30,13 +30,13 @@ std::int64_t size, sycl::buffer val); \ void (*init_dense_vector_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t * p_dvhandle, \ - std::int64_t size, FP_TYPE * val); \ + std::int64_t size, FP_TYPE* val); \ void (*set_dense_vector_data_buffer##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ std::int64_t size, sycl::buffer val); \ void (*set_dense_vector_data_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, \ - std::int64_t size, FP_TYPE * val) + std::int64_t size, FP_TYPE* val) // Dense matrix #define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ @@ -47,7 +47,7 @@ void (*init_dense_matrix_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t * p_dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE * val); \ + oneapi::mkl::layout dense_layout, FP_TYPE* val); \ void (*set_dense_matrix_data_buffer##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ @@ -55,7 +55,7 @@ void (*set_dense_matrix_data_usm##FP_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - oneapi::mkl::layout dense_layout, FP_TYPE * val) + oneapi::mkl::layout dense_layout, FP_TYPE* val) // COO matrix #define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ @@ -67,7 +67,7 @@ void (*init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::mkl::index_base index, INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val); \ + oneapi::mkl::index_base index, INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val); \ void (*set_coo_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ @@ -75,8 +75,8 @@ sycl::buffer val); \ void (*set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ - INT_TYPE * row_ind, INT_TYPE * col_ind, FP_TYPE * val) + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE* row_ind, \ + INT_TYPE* col_ind, FP_TYPE* val) // CSR matrix #define DEFINE_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ @@ -88,7 +88,7 @@ void (*init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t * p_smhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, \ - oneapi::mkl::index_base index, INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val); \ + oneapi::mkl::index_base index, INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val); \ void (*set_csr_matrix_data_buffer##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ @@ -96,23 +96,23 @@ sycl::buffer val); \ void (*set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX)( \ sycl::queue & queue, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, \ - std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, \ - INT_TYPE * row_ptr, INT_TYPE * col_ind, FP_TYPE * val) + std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, INT_TYPE* row_ptr, \ + INT_TYPE* col_ind, FP_TYPE* val) typedef struct { int version; // Dense vector FOR_EACH_FP_TYPE(DEFINE_DENSE_VECTOR_FUNCS); - sycl::event (*release_dense_vector)(sycl::queue &queue, + sycl::event (*release_dense_vector)(sycl::queue& queue, oneapi::mkl::sparse::dense_vector_handle_t dvhandle, - const std::vector &dependencies); + const std::vector& dependencies); // Dense matrix FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS); - sycl::event (*release_dense_matrix)(sycl::queue &queue, + sycl::event (*release_dense_matrix)(sycl::queue& queue, oneapi::mkl::sparse::dense_matrix_handle_t dmhandle, - const std::vector &dependencies); + const std::vector& dependencies); // COO matrix FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); @@ -121,117 +121,117 @@ typedef struct { FOR_EACH_FP_AND_INT_TYPE(DEFINE_CSR_MATRIX_FUNCS); // Common sparse matrix functions - sycl::event (*release_sparse_matrix)(sycl::queue &queue, + sycl::event (*release_sparse_matrix)(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle, - const std::vector &dependencies); + const std::vector& dependencies); - bool (*set_matrix_property)(sycl::queue &queue, oneapi::mkl::sparse::matrix_handle_t smhandle, + bool (*set_matrix_property)(sycl::queue& queue, oneapi::mkl::sparse::matrix_handle_t smhandle, oneapi::mkl::sparse::matrix_property property); // SPMM - void (*init_spmm_descr)(sycl::queue &queue, oneapi::mkl::sparse::spmm_descr_t *p_spmm_descr); + void (*init_spmm_descr)(sycl::queue& queue, oneapi::mkl::sparse::spmm_descr_t* p_spmm_descr); - sycl::event (*release_spmm_descr)(sycl::queue &queue, + sycl::event (*release_spmm_descr)(sycl::queue& queue, oneapi::mkl::sparse::spmm_descr_t spmm_descr, - const std::vector &dependencies); + const std::vector& dependencies); - void (*spmm_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, + void (*spmm_buffer_size)(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); void (*spmm_optimize_buffer)( - sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, sycl::buffer workspace); - sycl::event (*spmm_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, + sycl::event (*spmm_optimize_usm)(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_matrix_handle_t B_handle, - const void *beta, + const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, - oneapi::mkl::sparse::spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies); + oneapi::mkl::sparse::spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies); - sycl::event (*spmm)(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + sycl::event (*spmm)(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void *beta, + oneapi::mkl::sparse::dense_matrix_handle_t B_handle, const void* beta, oneapi::mkl::sparse::dense_matrix_handle_t C_handle, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::spmm_descr_t spmm_descr, - const std::vector &dependencies); + const std::vector& dependencies); // SPMV - void (*init_spmv_descr)(sycl::queue &queue, oneapi::mkl::sparse::spmv_descr_t *p_spmv_descr); + void (*init_spmv_descr)(sycl::queue& queue, oneapi::mkl::sparse::spmv_descr_t* p_spmv_descr); - sycl::event (*release_spmv_descr)(sycl::queue &queue, + sycl::event (*release_spmv_descr)(sycl::queue& queue, oneapi::mkl::sparse::spmv_descr_t spmv_descr, - const std::vector &dependencies); + const std::vector& dependencies); - void (*spmv_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + void (*spmv_buffer_size)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); void (*spmv_optimize_buffer)( - sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, sycl::buffer workspace); - sycl::event (*spmv_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, - const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + sycl::event (*spmv_optimize_usm)(sycl::queue& queue, oneapi::mkl::transpose opA, + const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, - const void *beta, + const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, - oneapi::mkl::sparse::spmv_descr_t spmv_descr, void *workspace, - const std::vector &dependencies); + oneapi::mkl::sparse::spmv_descr_t spmv_descr, void* workspace, + const std::vector& dependencies); - sycl::event (*spmv)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + sycl::event (*spmv)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, - oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void *beta, + oneapi::mkl::sparse::dense_vector_handle_t x_handle, const void* beta, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::spmv_descr_t spmv_descr, - const std::vector &dependencies); + const std::vector& dependencies); // SPSV - void (*init_spsv_descr)(sycl::queue &queue, oneapi::mkl::sparse::spsv_descr_t *p_spsv_descr); + void (*init_spsv_descr)(sycl::queue& queue, oneapi::mkl::sparse::spsv_descr_t* p_spsv_descr); - sycl::event (*release_spsv_descr)(sycl::queue &queue, + sycl::event (*release_spsv_descr)(sycl::queue& queue, oneapi::mkl::sparse::spsv_descr_t spsv_descr, - const std::vector &dependencies); + const std::vector& dependencies); - void (*spsv_buffer_size)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + void (*spsv_buffer_size)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size); + std::size_t& temp_buffer_size); - void (*spsv_optimize_buffer)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + void (*spsv_optimize_buffer)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, @@ -240,23 +240,23 @@ typedef struct { oneapi::mkl::sparse::spsv_descr_t spsv_descr, sycl::buffer workspace); - sycl::event (*spsv_optimize_usm)(sycl::queue &queue, oneapi::mkl::transpose opA, - const void *alpha, oneapi::mkl::sparse::matrix_view A_view, + sycl::event (*spsv_optimize_usm)(sycl::queue& queue, oneapi::mkl::transpose opA, + const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, - oneapi::mkl::sparse::spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies); + oneapi::mkl::sparse::spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies); - sycl::event (*spsv)(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, + sycl::event (*spsv)(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, oneapi::mkl::sparse::matrix_view A_view, oneapi::mkl::sparse::matrix_handle_t A_handle, oneapi::mkl::sparse::dense_vector_handle_t x_handle, oneapi::mkl::sparse::dense_vector_handle_t y_handle, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::spsv_descr_t spsv_descr, - const std::vector &dependencies); + const std::vector& dependencies); } sparse_blas_function_table_t; #undef DEFINE_DENSE_VECTOR_FUNCS diff --git a/src/sparse_blas/sparse_blas_loader.cpp b/src/sparse_blas/sparse_blas_loader.cpp index 4304479d1..f236b4626 100644 --- a/src/sparse_blas/sparse_blas_loader.cpp +++ b/src/sparse_blas/sparse_blas_loader.cpp @@ -33,29 +33,29 @@ static oneapi::mkl::detail::table_initializer \ - void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, \ + void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle, \ std::int64_t size, sycl::buffer val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_dense_vector_buffer##FP_SUFFIX(queue, p_dvhandle, \ size, val); \ } \ template <> \ - void init_dense_vector(sycl::queue &queue, dense_vector_handle_t *p_dvhandle, \ - std::int64_t size, FP_TYPE *val) { \ + void init_dense_vector(sycl::queue& queue, dense_vector_handle_t* p_dvhandle, \ + std::int64_t size, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_dense_vector_usm##FP_SUFFIX(queue, p_dvhandle, \ size, val); \ } \ template <> \ - void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, \ + void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle, \ std::int64_t size, sycl::buffer val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_dense_vector_data_buffer##FP_SUFFIX( \ queue, dvhandle, size, val); \ } \ template <> \ - void set_dense_vector_data(sycl::queue &queue, dense_vector_handle_t dvhandle, \ - std::int64_t size, FP_TYPE *val) { \ + void set_dense_vector_data(sycl::queue& queue, dense_vector_handle_t dvhandle, \ + std::int64_t size, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_dense_vector_data_usm##FP_SUFFIX(queue, dvhandle, \ size, val); \ @@ -63,8 +63,8 @@ static oneapi::mkl::detail::table_initializer &dependencies) { +sycl::event release_dense_vector(sycl::queue& queue, dense_vector_handle_t dvhandle, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_dense_vector(queue, dvhandle, dependencies); } @@ -72,7 +72,7 @@ sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhan // Dense matrix #define DEFINE_DENSE_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX) \ template <> \ - void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, \ + void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ layout dense_layout, sycl::buffer val) { \ auto libkey = get_device_id(queue); \ @@ -80,15 +80,15 @@ sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhan queue, p_dmhandle, num_rows, num_cols, ld, dense_layout, val); \ } \ template <> \ - void init_dense_matrix(sycl::queue &queue, dense_matrix_handle_t *p_dmhandle, \ + void init_dense_matrix(sycl::queue& queue, dense_matrix_handle_t* p_dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - layout dense_layout, FP_TYPE *val) { \ + layout dense_layout, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_dense_matrix_usm##FP_SUFFIX( \ queue, p_dmhandle, num_rows, num_cols, ld, dense_layout, val); \ } \ template <> \ - void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, \ + void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ layout dense_layout, sycl::buffer val) { \ auto libkey = get_device_id(queue); \ @@ -96,9 +96,9 @@ sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhan queue, dmhandle, num_rows, num_cols, ld, dense_layout, val); \ } \ template <> \ - void set_dense_matrix_data(sycl::queue &queue, dense_matrix_handle_t dmhandle, \ + void set_dense_matrix_data(sycl::queue& queue, dense_matrix_handle_t dmhandle, \ std::int64_t num_rows, std::int64_t num_cols, std::int64_t ld, \ - layout dense_layout, FP_TYPE *val) { \ + layout dense_layout, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_dense_matrix_data_usm##FP_SUFFIX( \ queue, dmhandle, num_rows, num_cols, ld, dense_layout, val); \ @@ -106,8 +106,8 @@ sycl::event release_dense_vector(sycl::queue &queue, dense_vector_handle_t dvhan FOR_EACH_FP_TYPE(DEFINE_DENSE_MATRIX_FUNCS); #undef DEFINE_DENSE_MATRIX_FUNCS -sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhandle, - const std::vector &dependencies) { +sycl::event release_dense_matrix(sycl::queue& queue, dense_matrix_handle_t dmhandle, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_dense_matrix(queue, dmhandle, dependencies); } @@ -115,7 +115,7 @@ sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhan // COO matrix #define DEFINE_COO_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ template <> \ - void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ sycl::buffer row_ind, sycl::buffer col_ind, \ sycl::buffer val) { \ @@ -124,15 +124,15 @@ sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhan queue, p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } \ template <> \ - void init_coo_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + void init_coo_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ - INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val) { \ + INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_coo_matrix_usm##FP_SUFFIX##INT_SUFFIX( \ queue, p_smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } \ template <> \ - void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ sycl::buffer row_ind, sycl::buffer col_ind, \ sycl::buffer val) { \ @@ -141,9 +141,9 @@ sycl::event release_dense_matrix(sycl::queue &queue, dense_matrix_handle_t dmhan queue, smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ } \ template <> \ - void set_coo_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + void set_coo_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ - INT_TYPE *row_ind, INT_TYPE *col_ind, FP_TYPE *val) { \ + INT_TYPE* row_ind, INT_TYPE* col_ind, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_coo_matrix_data_usm##FP_SUFFIX##INT_SUFFIX( \ queue, smhandle, num_rows, num_cols, nnz, index, row_ind, col_ind, val); \ @@ -154,7 +154,7 @@ FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); // CSR matrix #define DEFINE_INIT_CSR_MATRIX_FUNCS(FP_TYPE, FP_SUFFIX, INT_TYPE, INT_SUFFIX) \ template <> \ - void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ sycl::buffer row_ptr, sycl::buffer col_ind, \ sycl::buffer val) { \ @@ -163,15 +163,15 @@ FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); queue, p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ } \ template <> \ - void init_csr_matrix(sycl::queue &queue, matrix_handle_t *p_smhandle, std::int64_t num_rows, \ + void init_csr_matrix(sycl::queue& queue, matrix_handle_t* p_smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ - INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val) { \ + INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].init_csr_matrix_usm##FP_SUFFIX##INT_SUFFIX( \ queue, p_smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ } \ template <> \ - void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ sycl::buffer row_ptr, sycl::buffer col_ind, \ sycl::buffer val) { \ @@ -180,9 +180,9 @@ FOR_EACH_FP_AND_INT_TYPE(DEFINE_COO_MATRIX_FUNCS); queue, smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ } \ template <> \ - void set_csr_matrix_data(sycl::queue &queue, matrix_handle_t smhandle, std::int64_t num_rows, \ + void set_csr_matrix_data(sycl::queue& queue, matrix_handle_t smhandle, std::int64_t num_rows, \ std::int64_t num_cols, std::int64_t nnz, index_base index, \ - INT_TYPE *row_ptr, INT_TYPE *col_ind, FP_TYPE *val) { \ + INT_TYPE* row_ptr, INT_TYPE* col_ind, FP_TYPE* val) { \ auto libkey = get_device_id(queue); \ function_tables[{ libkey, queue }].set_csr_matrix_data_usm##FP_SUFFIX##INT_SUFFIX( \ queue, smhandle, num_rows, num_cols, nnz, index, row_ptr, col_ind, val); \ @@ -191,43 +191,43 @@ FOR_EACH_FP_AND_INT_TYPE(DEFINE_INIT_CSR_MATRIX_FUNCS); #undef DEFINE_INIT_CSR_MATRIX_FUNCS // Common sparse matrix functions -sycl::event release_sparse_matrix(sycl::queue &queue, matrix_handle_t smhandle, - const std::vector &dependencies) { +sycl::event release_sparse_matrix(sycl::queue& queue, matrix_handle_t smhandle, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_sparse_matrix(queue, smhandle, dependencies); } -bool set_matrix_property(sycl::queue &queue, matrix_handle_t smhandle, matrix_property property) { +bool set_matrix_property(sycl::queue& queue, matrix_handle_t smhandle, matrix_property property) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].set_matrix_property(queue, smhandle, property); } // SPMM -void init_spmm_descr(sycl::queue &queue, spmm_descr_t *p_spmm_descr) { +void init_spmm_descr(sycl::queue& queue, spmm_descr_t* p_spmm_descr) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].init_spmm_descr(queue, p_spmm_descr); } -sycl::event release_spmm_descr(sycl::queue &queue, spmm_descr_t spmm_descr, - const std::vector &dependencies) { +sycl::event release_spmm_descr(sycl::queue& queue, spmm_descr_t spmm_descr, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_spmm_descr(queue, spmm_descr, dependencies); } -void spmm_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, +void spmm_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - std::size_t &temp_buffer_size) { + std::size_t& temp_buffer_size) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spmm_buffer_size(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, temp_buffer_size); } -void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, +void spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, sycl::buffer workspace) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spmm_optimize_buffer(queue, opA, opB, alpha, A_view, @@ -235,23 +235,23 @@ void spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl:: spmm_descr, workspace); } -sycl::event spmm_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, - oneapi::mkl::transpose opB, const void *alpha, matrix_view A_view, +sycl::event spmm_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, + oneapi::mkl::transpose opB, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_matrix_handle_t B_handle, - const void *beta, dense_matrix_handle_t C_handle, spmm_alg alg, - spmm_descr_t spmm_descr, void *workspace, - const std::vector &dependencies) { + const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, + spmm_descr_t spmm_descr, void* workspace, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spmm_optimize_usm( queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, workspace, dependencies); } -sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, - const void *alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_matrix_handle_t B_handle, const void *beta, dense_matrix_handle_t C_handle, +sycl::event spmm(sycl::queue& queue, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, + const void* alpha, matrix_view A_view, matrix_handle_t A_handle, + dense_matrix_handle_t B_handle, const void* beta, dense_matrix_handle_t C_handle, spmm_alg alg, spmm_descr_t spmm_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spmm(queue, opA, opB, alpha, A_view, A_handle, B_handle, beta, C_handle, alg, spmm_descr, @@ -259,78 +259,78 @@ sycl::event spmm(sycl::queue &queue, oneapi::mkl::transpose opA, oneapi::mkl::tr } // SPMV -void init_spmv_descr(sycl::queue &queue, spmv_descr_t *p_spmv_descr) { +void init_spmv_descr(sycl::queue& queue, spmv_descr_t* p_spmv_descr) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].init_spmv_descr(queue, p_spmv_descr); } -sycl::event release_spmv_descr(sycl::queue &queue, spmv_descr_t spmv_descr, - const std::vector &dependencies) { +sycl::event release_spmv_descr(sycl::queue& queue, spmv_descr_t spmv_descr, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_spmv_descr(queue, spmv_descr, dependencies); } -void spmv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spmv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, std::size_t &temp_buffer_size) { + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, std::size_t& temp_buffer_size) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spmv_buffer_size(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, temp_buffer_size); } -void spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, sycl::buffer workspace) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spmv_optimize_buffer( queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, workspace); } -sycl::event spmv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spmv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, - dense_vector_handle_t x_handle, const void *beta, + dense_vector_handle_t x_handle, const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, spmv_descr_t spmv_descr, - void *workspace, const std::vector &dependencies) { + void* workspace, const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spmv_optimize_usm( queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, workspace, dependencies); } -sycl::event spmv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spmv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, - const void *beta, dense_vector_handle_t y_handle, spmv_alg alg, - spmv_descr_t spmv_descr, const std::vector &dependencies) { + const void* beta, dense_vector_handle_t y_handle, spmv_alg alg, + spmv_descr_t spmv_descr, const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spmv(queue, opA, alpha, A_view, A_handle, x_handle, beta, y_handle, alg, spmv_descr, dependencies); } // SPSV -void init_spsv_descr(sycl::queue &queue, spsv_descr_t *p_spsv_descr) { +void init_spsv_descr(sycl::queue& queue, spsv_descr_t* p_spsv_descr) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].init_spsv_descr(queue, p_spsv_descr); } -sycl::event release_spsv_descr(sycl::queue &queue, spsv_descr_t spsv_descr, - const std::vector &dependencies) { +sycl::event release_spsv_descr(sycl::queue& queue, spsv_descr_t spsv_descr, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].release_spsv_descr(queue, spsv_descr, dependencies); } -void spsv_buffer_size(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spsv_buffer_size(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - std::size_t &temp_buffer_size) { + std::size_t& temp_buffer_size) { auto libkey = get_device_id(queue); function_tables[{ libkey, queue }].spsv_buffer_size( queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, temp_buffer_size); } -void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +void spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, sycl::buffer workspace) { @@ -339,21 +339,21 @@ void spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *a queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, workspace); } -sycl::event spsv_optimize(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spsv_optimize(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, - spsv_alg alg, spsv_descr_t spsv_descr, void *workspace, - const std::vector &dependencies) { + spsv_alg alg, spsv_descr_t spsv_descr, void* workspace, + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spsv_optimize_usm(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, workspace, dependencies); } -sycl::event spsv(sycl::queue &queue, oneapi::mkl::transpose opA, const void *alpha, +sycl::event spsv(sycl::queue& queue, oneapi::mkl::transpose opA, const void* alpha, matrix_view A_view, matrix_handle_t A_handle, dense_vector_handle_t x_handle, dense_vector_handle_t y_handle, spsv_alg alg, spsv_descr_t spsv_descr, - const std::vector &dependencies) { + const std::vector& dependencies) { auto libkey = get_device_id(queue); return function_tables[{ libkey, queue }].spsv(queue, opA, alpha, A_view, A_handle, x_handle, y_handle, alg, spsv_descr, dependencies); diff --git a/tests/unit_tests/blas/batch/axpy_batch_stride.cpp b/tests/unit_tests/blas/batch/axpy_batch_stride.cpp index 9bb1406ef..e311237a1 100644 --- a/tests/unit_tests/blas/batch/axpy_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/axpy_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp alpha, int64_t batch_size) { // Prepare data. int64_t n, i; @@ -77,19 +77,19 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::axpy(&n_ref, (fp_ref *)&alpha, (fp_ref *)x.data() + i * stride_x, &incx_ref, - (fp_ref *)y_ref.data() + i * stride_y, &incy_ref); + ::axpy(&n_ref, (fp_ref*)&alpha, (fp_ref*)x.data() + i * stride_x, &incx_ref, + (fp_ref*)y_ref.data() + i * stride_y, &incy_ref); } // Call DPC++ AXPY_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -133,17 +133,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -160,7 +160,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp } class AxpyBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpyBatchStrideTests, RealSinglePrecision) { float alpha = 2.0; diff --git a/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp index 9ebc82abe..36f260e10 100644 --- a/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/axpy_batch_stride_usm.cpp @@ -43,20 +43,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp alpha, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -101,8 +101,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::axpy(&n_ref, (fp_ref *)&alpha, (fp_ref *)x.data() + i * stride_x, &incx_ref, - (fp_ref *)y_ref.data() + i * stride_y, &incy_ref); + ::axpy(&n_ref, (fp_ref*)&alpha, (fp_ref*)x.data() + i * stride_x, &incx_ref, + (fp_ref*)y_ref.data() + i * stride_y, &incy_ref); } // Call DPC++ AXPY_BATCH_STRIDE. @@ -140,17 +140,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -166,7 +166,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, fp } class AxpyBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpyBatchStrideUsmTests, RealSinglePrecision) { float alpha = 2.0; diff --git a/tests/unit_tests/blas/batch/axpy_batch_usm.cpp b/tests/unit_tests/blas/batch/axpy_batch_usm.cpp index 4dacf8ddb..a65367eb0 100644 --- a/tests/unit_tests/blas/batch/axpy_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/axpy_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -69,15 +69,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { std::vector dependencies; // Prepare data. - int64_t *n = - (int64_t *)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *incx = - (int64_t *)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *incy = - (int64_t *)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - fp *alpha = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * group_count, *dev, cxt); - int64_t *group_size = - (int64_t *)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* n = (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* incx = + (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* incy = + (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + fp* alpha = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * group_count, *dev, cxt); + int64_t* group_size = + (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); if ((n == NULL) || (incx == NULL) || (incy == NULL) || (alpha == NULL) || (group_size == NULL)) { @@ -104,12 +103,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - fp **x_array = - (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); - fp **y_array = - (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); - fp **y_ref_array = - (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); + fp** x_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); + fp** y_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); + fp** y_ref_array = + (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); if ((x_array == NULL) || (y_array == NULL) || (y_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -124,11 +121,11 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_size_x = (1 + (n[i] - 1) * std::abs(incx[i])); total_size_y = (1 + (n[i] - 1) * std::abs(incy[i])); x_array[idx] = - (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt); + (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt); y_array[idx] = - (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); + (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); y_ref_array[idx] = - (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); + (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); rand_vector(x_array[idx], n[i], incx[i]); rand_vector(y_array[idx], n[i], incy[i]); copy_vector(y_array[idx], n[i], incy[i], y_ref_array[idx]); @@ -146,8 +143,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { n_ref = (int)n[i]; incx_ref = (int)incx[i]; incy_ref = (int)incy[i]; - ::axpy((const int *)&n_ref, (const fp_ref *)&alpha[i], (const fp_ref *)x_array[idx], - (const int *)&incx_ref, (fp_ref *)y_ref_array[idx], (const int *)&incy_ref); + ::axpy((const int*)&n_ref, (const fp_ref*)&alpha[i], (const fp_ref*)x_array[idx], + (const int*)&incx_ref, (fp_ref*)y_ref_array[idx], (const int*)&incy_ref); idx++; } } @@ -159,12 +156,12 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { switch (layout) { case oneapi::mkl::layout::col_major: done = oneapi::mkl::blas::column_major::axpy_batch( - main_queue, n, alpha, (const fp **)x_array, incx, y_array, incy, group_count, + main_queue, n, alpha, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; case oneapi::mkl::layout::row_major: done = oneapi::mkl::blas::row_major::axpy_batch( - main_queue, n, alpha, (const fp **)x_array, incx, y_array, incy, group_count, + main_queue, n, alpha, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; default: break; @@ -174,12 +171,12 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { switch (layout) { case oneapi::mkl::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::axpy_batch, n, - alpha, (const fp **)x_array, incx, y_array, incy, + alpha, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; case oneapi::mkl::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::axpy_batch, n, - alpha, (const fp **)x_array, incx, y_array, incy, + alpha, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; default: break; @@ -187,13 +184,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { idx = 0; for (i = 0; i < group_count; i++) { for (j = 0; j < group_size[i]; j++) { @@ -214,7 +211,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY_BATCH:\n" << error.what() << std::endl; } @@ -252,7 +249,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class AxpyBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpyBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/copy_batch_stride.cpp b/tests/unit_tests/blas/batch/copy_batch_stride.cpp index a1da595f6..ff51e1c6d 100644 --- a/tests/unit_tests/blas/batch/copy_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/copy_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { // Prepare data. int64_t n, i; @@ -76,19 +76,19 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::copy(&n_ref, (fp_ref *)x.data() + i * stride_x, &incx_ref, - (fp_ref *)y_ref.data() + i * stride_y, &incy_ref); + ::copy(&n_ref, (fp_ref*)x.data() + i * stride_x, &incx_ref, + (fp_ref*)y_ref.data() + i * stride_y, &incy_ref); } // Call DPC++ COPY_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during COPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -130,17 +130,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during COPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of COPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -157,7 +157,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in } class CopyBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(CopyBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 15)); diff --git a/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp index 569293be1..062054d55 100644 --- a/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/copy_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during COPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -100,8 +100,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::copy(&n_ref, (fp_ref *)x.data() + i * stride_x, &incx_ref, - (fp_ref *)y_ref.data() + i * stride_y, &incy_ref); + ::copy(&n_ref, (fp_ref*)x.data() + i * stride_x, &incx_ref, + (fp_ref*)y_ref.data() + i * stride_y, &incy_ref); } // Call DPC++ COPY_BATCH_STRIDE. @@ -139,17 +139,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during COPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of COPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -165,7 +165,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in } class CopyBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(CopyBatchStrideUsmTests, RealSinglePrecision) { float alpha = 2.0; diff --git a/tests/unit_tests/blas/batch/copy_batch_usm.cpp b/tests/unit_tests/blas/batch/copy_batch_usm.cpp index 8cac23704..ce051a046 100644 --- a/tests/unit_tests/blas/batch/copy_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/copy_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during COPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -69,14 +69,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { std::vector dependencies; // Prepare data. - int64_t *n = - (int64_t *)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *incx = - (int64_t *)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *incy = - (int64_t *)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); - int64_t *group_size = - (int64_t *)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* n = (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* incx = + (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* incy = + (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); + int64_t* group_size = + (int64_t*)oneapi::mkl::malloc_shared(64, sizeof(int64_t) * group_count, *dev, cxt); if ((n == NULL) || (incx == NULL) || (incy == NULL) || (group_size == NULL)) { std::cout << "Error cannot allocate input arrays\n"; @@ -100,12 +99,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - fp **x_array = - (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); - fp **y_array = - (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); - fp **y_ref_array = - (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * total_batch_count, *dev, cxt); + fp** x_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); + fp** y_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); + fp** y_ref_array = + (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * total_batch_count, *dev, cxt); if ((x_array == NULL) || (y_array == NULL) || (y_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -120,11 +117,11 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_size_x = (1 + (n[i] - 1) * std::abs(incx[i])); total_size_y = (1 + (n[i] - 1) * std::abs(incy[i])); x_array[idx] = - (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt); + (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_x, *dev, cxt); y_array[idx] = - (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); + (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); y_ref_array[idx] = - (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); + (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * total_size_y, *dev, cxt); rand_vector(x_array[idx], n[i], incx[i]); rand_vector(y_array[idx], n[i], incy[i]); copy_vector(y_array[idx], n[i], incy[i], y_ref_array[idx]); @@ -142,8 +139,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { n_ref = (int)n[i]; incx_ref = (int)incx[i]; incy_ref = (int)incy[i]; - ::copy((const int *)&n_ref, (const fp_ref *)x_array[idx], (const int *)&incx_ref, - (fp_ref *)y_ref_array[idx], (const int *)&incy_ref); + ::copy((const int*)&n_ref, (const fp_ref*)x_array[idx], (const int*)&incx_ref, + (fp_ref*)y_ref_array[idx], (const int*)&incy_ref); idx++; } } @@ -155,11 +152,11 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { switch (layout) { case oneapi::mkl::layout::col_major: done = oneapi::mkl::blas::column_major::copy_batch( - main_queue, n, (const fp **)x_array, incx, y_array, incy, group_count, + main_queue, n, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; case oneapi::mkl::layout::row_major: - done = oneapi::mkl::blas::row_major::copy_batch(main_queue, n, (const fp **)x_array, + done = oneapi::mkl::blas::row_major::copy_batch(main_queue, n, (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; @@ -170,12 +167,12 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { switch (layout) { case oneapi::mkl::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::copy_batch, n, - (const fp **)x_array, incx, y_array, incy, group_count, + (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; case oneapi::mkl::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::copy_batch, n, - (const fp **)x_array, incx, y_array, incy, group_count, + (const fp**)x_array, incx, y_array, incy, group_count, group_size, dependencies); break; default: break; @@ -183,13 +180,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during COPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { idx = 0; for (i = 0; i < group_count; i++) { for (j = 0; j < group_size[i]; j++) { @@ -209,7 +206,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of COPY_BATCH:\n" << error.what() << std::endl; } @@ -246,7 +243,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class CopyBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(CopyBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp b/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp index bb642c3ee..5e4bd82d8 100644 --- a/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/dgmm_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, int64_t incx, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, int64_t incx, int64_t batch_size) { // Prepare data. int64_t m, n; @@ -90,21 +90,20 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, for (i = 0; i < batch_size_ref; i++) { ::dgmm(convert_to_cblas_layout(layout), convert_to_cblas_side(left_right), - (const int *)&m_ref, (const int *)&n_ref, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (const fp_ref *)(x.data() + stride_x * i), - (const int *)&incx_ref, (fp_ref *)(C_ref.data() + stride_c * i), - (const int *)&ldc_ref); + (const int*)&m_ref, (const int*)&n_ref, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)(x.data() + stride_x * i), + (const int*)&incx_ref, (fp_ref*)(C_ref.data() + stride_c * i), (const int*)&ldc_ref); } // Call DPC++ DGMM_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DGMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -149,17 +148,17 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DGMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DGMM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -177,7 +176,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, } class DgmmBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DgmmBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), diff --git a/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp index bb9cf0df3..7b6389b0f 100644 --- a/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/dgmm_batch_stride_usm.cpp @@ -43,20 +43,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, int64_t incx, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, int64_t incx, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DGMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -115,10 +115,9 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, for (i = 0; i < batch_size_ref; i++) { ::dgmm(convert_to_cblas_layout(layout), convert_to_cblas_side(left_right), - (const int *)&m_ref, (const int *)&n_ref, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (const fp_ref *)(x.data() + stride_x * i), - (const int *)&incx_ref, (fp_ref *)(C_ref.data() + stride_c * i), - (const int *)&ldc_ref); + (const int*)&m_ref, (const int*)&n_ref, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)(x.data() + stride_x * i), + (const int*)&incx_ref, (fp_ref*)(C_ref.data() + stride_c * i), (const int*)&ldc_ref); } // Call DPC++ DGMM_BATCH_STRIDE. @@ -156,17 +155,17 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DGMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DGMM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -182,7 +181,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::side left_right, } class DgmmBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DgmmBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), diff --git a/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp b/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp index 1f568580f..87b127358 100644 --- a/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/dgmm_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DGMM_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -102,9 +102,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), x_array(uafpp), c_array(uafpp), - c_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), x_array(uafpp), c_array(uafpp), c_ref_array(uafpp); a_array.resize(total_batch_count); x_array.resize(total_batch_count); c_array.resize(total_batch_count); @@ -117,10 +116,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { size_x = 1 + (x_len - 1) * std::abs(incx[i]); size_c = (layout == oneapi::mkl::layout::col_major) ? ldc[i] * n[i] : ldc[i] * m[i]; for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - x_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt); - c_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); - c_ref_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); + a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + x_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt); + c_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); + c_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); rand_matrix(a_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], lda[i]); rand_vector(x_array[idx], x_len, incx[i]); rand_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldc[i]); @@ -132,15 +131,15 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { // Call reference DGMM_BATCH. using fp_ref = typename ref_type_info::type; - int *m_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *n_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *incx_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *ldc_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* m_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* incx_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* ldc_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - CBLAS_SIDE *left_right_ref = - (CBLAS_SIDE *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count); + CBLAS_SIDE* left_right_ref = + (CBLAS_SIDE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count); if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (incx_ref == NULL) || (ldc_ref == NULL) || (left_right_ref == NULL) || (group_size_ref == NULL)) { @@ -174,10 +173,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { ldc_ref[i] = (int)ldc[i]; group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { - ::dgmm(convert_to_cblas_layout(layout), left_right_ref[i], (const int *)&m_ref[i], - (const int *)&n_ref[i], (const fp_ref *)a_array[idx], (const int *)&lda_ref[i], - (const fp_ref *)x_array[idx], (const int *)&incx_ref[i], - (fp_ref *)c_ref_array[idx], (const int *)&ldc_ref[i]); + ::dgmm(convert_to_cblas_layout(layout), left_right_ref[i], (const int*)&m_ref[i], + (const int*)&n_ref[i], (const fp_ref*)a_array[idx], (const int*)&lda_ref[i], + (const fp_ref*)x_array[idx], (const int*)&incx_ref[i], (fp_ref*)c_ref_array[idx], + (const int*)&ldc_ref[i]); idx++; } } @@ -189,14 +188,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { switch (layout) { case oneapi::mkl::layout::col_major: done = oneapi::mkl::blas::column_major::dgmm_batch( - main_queue, &left_right[0], &m[0], &n[0], (const fp **)&a_array[0], &lda[0], - (const fp **)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, + main_queue, &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], &lda[0], + (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: done = oneapi::mkl::blas::row_major::dgmm_batch( - main_queue, &left_right[0], &m[0], &n[0], (const fp **)&a_array[0], &lda[0], - (const fp **)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, + main_queue, &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], &lda[0], + (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -206,14 +205,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { switch (layout) { case oneapi::mkl::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::dgmm_batch, - &left_right[0], &m[0], &n[0], (const fp **)&a_array[0], - &lda[0], (const fp **)&x_array[0], &incx[0], &c_array[0], + &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], + &lda[0], (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::dgmm_batch, - &left_right[0], &m[0], &n[0], (const fp **)&a_array[0], - &lda[0], (const fp **)&x_array[0], &incx[0], &c_array[0], + &left_right[0], &m[0], &n[0], (const fp**)&a_array[0], + &lda[0], (const fp**)&x_array[0], &incx[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -221,13 +220,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DGMM_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::aligned_free(m_ref); oneapi::mkl::aligned_free(n_ref); oneapi::mkl::aligned_free(lda_ref); @@ -248,7 +247,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DGMM_BATCH:\n" << error.what() << std::endl; } @@ -285,7 +284,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class DgmmBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DgmmBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/gemm_batch_stride.cpp b/tests/unit_tests/blas/batch/gemm_batch_stride.cpp index 5241cb822..3dad54f33 100644 --- a/tests/unit_tests/blas/batch/gemm_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/gemm_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Prepare data. int64_t m, n, k; int64_t lda, ldb, ldc; @@ -135,23 +135,22 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { for (i = 0; i < batch_size_ref; i++) { ::gemm(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), - convert_to_cblas_trans(transb), (const int *)&m_ref, (const int *)&n_ref, - (const int *)&k_ref, (const fp_ref *)&alpha, - (const fp_ref *)(A_ref.data() + stride_a * i), (const int *)&lda_ref, - (const fp_ref *)(B_ref.data() + stride_b * i), (const int *)&ldb_ref, - (const fp_ref *)&beta, (fp_ref *)(C_ref.data() + stride_c * i), - (const int *)&ldc_ref); + convert_to_cblas_trans(transb), (const int*)&m_ref, (const int*)&n_ref, + (const int*)&k_ref, (const fp_ref*)&alpha, + (const fp_ref*)(A_ref.data() + stride_a * i), (const int*)&lda_ref, + (const fp_ref*)(B_ref.data() + stride_b * i), (const int*)&ldb_ref, + (const fp_ref*)&beta, (fp_ref*)(C_ref.data() + stride_c * i), (const int*)&ldc_ref); } // Call DPC++ GEMM_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -199,17 +198,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { #endif main_queue.wait_and_throw(); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -232,7 +231,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class GemmBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemmBatchStrideTests, RealHalfPrecision) { EXPECT_TRUEORSKIP((test( diff --git a/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp index 97f2dd086..12a5a4f61 100644 --- a/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/gemm_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -136,10 +136,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { C_ref.resize(stride_c * batch_size); C_cast_ref.resize(stride_c * batch_size); - Ta **a_array = (Ta **)oneapi::mkl::malloc_shared(64, sizeof(Ta *) * batch_size, *dev, cxt); - Tb **b_array = (Tb **)oneapi::mkl::malloc_shared(64, sizeof(Tb *) * batch_size, *dev, cxt); - Tc **c_array = (Tc **)oneapi::mkl::malloc_shared(64, sizeof(Tc *) * batch_size, *dev, cxt); - Ts **c_ref_array = (Ts **)oneapi::mkl::malloc_shared(64, sizeof(Ts *) * batch_size, *dev, cxt); + Ta** a_array = (Ta**)oneapi::mkl::malloc_shared(64, sizeof(Ta*) * batch_size, *dev, cxt); + Tb** b_array = (Tb**)oneapi::mkl::malloc_shared(64, sizeof(Tb*) * batch_size, *dev, cxt); + Tc** c_array = (Tc**)oneapi::mkl::malloc_shared(64, sizeof(Tc*) * batch_size, *dev, cxt); + Ts** c_ref_array = (Ts**)oneapi::mkl::malloc_shared(64, sizeof(Ts*) * batch_size, *dev, cxt); if ((a_array == NULL) || (b_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -181,12 +181,11 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { ::gemm(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), - convert_to_cblas_trans(transb), (const int *)&m_ref, (const int *)&n_ref, - (const int *)&k_ref, (const fp_ref *)&alpha, - (const fp_ref *)(A_ref.data() + stride_a * i), (const int *)&lda_ref, - (const fp_ref *)(B_ref.data() + stride_b * i), (const int *)&ldb_ref, - (const fp_ref *)&beta, (fp_ref *)(C_ref.data() + stride_c * i), - (const int *)&ldc_ref); + convert_to_cblas_trans(transb), (const int*)&m_ref, (const int*)&n_ref, + (const int*)&k_ref, (const fp_ref*)&alpha, + (const fp_ref*)(A_ref.data() + stride_a * i), (const int*)&lda_ref, + (const fp_ref*)(B_ref.data() + stride_b * i), (const int*)&ldb_ref, + (const fp_ref*)&beta, (fp_ref*)(C_ref.data() + stride_c * i), (const int*)&ldc_ref); } // Call DPC++ GEMM_BATCH_STRIDE. @@ -226,13 +225,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { main_queue.wait_and_throw(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::free_shared(a_array, cxt); oneapi::mkl::free_shared(b_array, cxt); oneapi::mkl::free_shared(c_array, cxt); @@ -240,7 +239,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -267,7 +266,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class GemmBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemmBatchStrideUsmTests, RealHalfPrecision) { EXPECT_TRUEORSKIP((test( diff --git a/tests/unit_tests/blas/batch/gemm_batch_usm.cpp b/tests/unit_tests/blas/batch/gemm_batch_usm.cpp index a651f9ae3..a78bbb26f 100644 --- a/tests/unit_tests/blas/batch/gemm_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/gemm_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMM_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -126,14 +126,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uaTap = usm_allocator(cxt, *dev); - auto uaTbp = usm_allocator(cxt, *dev); - auto uaTcp = usm_allocator(cxt, *dev); - auto uaTsp = usm_allocator(cxt, *dev); - vector a_array(uaTap); - vector b_array(uaTbp); - vector c_array(uaTcp), c_cast_ref_array(uaTcp); - vector a_ref_array(uaTsp), b_ref_array(uaTsp), c_ref_array(uaTsp); + auto uaTap = usm_allocator(cxt, *dev); + auto uaTbp = usm_allocator(cxt, *dev); + auto uaTcp = usm_allocator(cxt, *dev); + auto uaTsp = usm_allocator(cxt, *dev); + vector a_array(uaTap); + vector b_array(uaTbp); + vector c_array(uaTcp), c_cast_ref_array(uaTcp); + vector a_ref_array(uaTsp), b_ref_array(uaTsp), c_ref_array(uaTsp); a_array.resize(total_batch_count); b_array.resize(total_batch_count); c_array.resize(total_batch_count); @@ -158,14 +158,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { default: break; } for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (Ta *)oneapi::mkl::malloc_shared(64, sizeof(Ta) * size_a, *dev, cxt); - b_array[idx] = (Tb *)oneapi::mkl::malloc_shared(64, sizeof(Tb) * size_b, *dev, cxt); - c_array[idx] = (Tc *)oneapi::mkl::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt); - a_ref_array[idx] = (Ts *)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_a, *dev, cxt); - b_ref_array[idx] = (Ts *)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_b, *dev, cxt); + a_array[idx] = (Ta*)oneapi::mkl::malloc_shared(64, sizeof(Ta) * size_a, *dev, cxt); + b_array[idx] = (Tb*)oneapi::mkl::malloc_shared(64, sizeof(Tb) * size_b, *dev, cxt); + c_array[idx] = (Tc*)oneapi::mkl::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt); + a_ref_array[idx] = (Ts*)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_a, *dev, cxt); + b_ref_array[idx] = (Ts*)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_b, *dev, cxt); c_cast_ref_array[idx] = - (Tc *)oneapi::mkl::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt); - c_ref_array[idx] = (Ts *)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_c, *dev, cxt); + (Tc*)oneapi::mkl::malloc_shared(64, sizeof(Tc) * size_c, *dev, cxt); + c_ref_array[idx] = (Ts*)oneapi::mkl::malloc_shared(64, sizeof(Ts) * size_c, *dev, cxt); rand_matrix(a_array[idx], layout, transa[i], m[i], k[i], lda[i]); rand_matrix(b_array[idx], layout, transb[i], k[i], n[i], ldb[i]); rand_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldc[i]); @@ -179,18 +179,18 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { // Call reference GEMM_BATCH. using fp_ref = typename ref_type_info::type; - int *m_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *n_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *k_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *ldb_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *ldc_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - - CBLAS_TRANSPOSE *transa_ref = - (CBLAS_TRANSPOSE *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); - CBLAS_TRANSPOSE *transb_ref = - (CBLAS_TRANSPOSE *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + int* m_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* k_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* ldb_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* ldc_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + + CBLAS_TRANSPOSE* transa_ref = + (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + CBLAS_TRANSPOSE* transb_ref = + (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); if ((m_ref == NULL) || (n_ref == NULL) || (k_ref == NULL) || (lda_ref == NULL) || (ldb_ref == NULL) || (ldc_ref == NULL) || (transa_ref == NULL) || (transb_ref == NULL) || @@ -233,11 +233,11 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { ::gemm(convert_to_cblas_layout(layout), transa_ref[i], transb_ref[i], - (const int *)&m_ref[i], (const int *)&n_ref[i], (const int *)&k_ref[i], - (const fp_ref *)&alpha[i], (const fp_ref *)a_ref_array[idx], - (const int *)&lda_ref[i], (const fp_ref *)b_ref_array[idx], - (const int *)&ldb_ref[i], (const fp_ref *)&beta[i], (fp_ref *)c_ref_array[idx], - (const int *)&ldc_ref[i]); + (const int*)&m_ref[i], (const int*)&n_ref[i], (const int*)&k_ref[i], + (const fp_ref*)&alpha[i], (const fp_ref*)a_ref_array[idx], + (const int*)&lda_ref[i], (const fp_ref*)b_ref_array[idx], + (const int*)&ldb_ref[i], (const fp_ref*)&beta[i], (fp_ref*)c_ref_array[idx], + (const int*)&ldc_ref[i]); idx++; } } @@ -250,13 +250,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { case oneapi::mkl::layout::col_major: done = oneapi::mkl::blas::column_major::gemm_batch( main_queue, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0], - (const Ta **)&a_array[0], &lda[0], (const Tb **)&b_array[0], &ldb[0], &beta[0], + (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0], &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: done = oneapi::mkl::blas::row_major::gemm_batch( main_queue, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0], - (const Ta **)&a_array[0], &lda[0], (const Tb **)&b_array[0], &ldb[0], &beta[0], + (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0], &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -267,14 +267,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { case oneapi::mkl::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemm_batch, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0], - (const Ta **)&a_array[0], &lda[0], (const Tb **)&b_array[0], + (const Ta**)&a_array[0], &lda[0], (const Tb**)&b_array[0], &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemm_batch, &transa[0], &transb[0], &m[0], &n[0], &k[0], &alpha[0], - (const Ta **)&a_array[0], &lda[0], (const Ta **)&b_array[0], + (const Ta**)&a_array[0], &lda[0], (const Ta**)&b_array[0], &ldb[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; @@ -283,13 +283,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { main_queue.wait_and_throw(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMM_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::aligned_free(m_ref); oneapi::mkl::aligned_free(n_ref); oneapi::mkl::aligned_free(k_ref); @@ -315,7 +315,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMM_BATCH:\n" << error.what() << std::endl; } @@ -364,7 +364,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class GemmBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemmBatchUsmTests, RealHalfPrecision) { EXPECT_TRUEORSKIP((test( diff --git a/tests/unit_tests/blas/batch/gemv_batch_stride.cpp b/tests/unit_tests/blas/batch/gemv_batch_stride.cpp index bd92f70ca..f50686c13 100644 --- a/tests/unit_tests/blas/batch/gemv_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/gemv_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { // Prepare data. int64_t m, n; int64_t lda; @@ -103,23 +103,22 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), (const int *)&m_ref, - (const int *)&n_ref, (const fp_ref *)&alpha, - (const fp_ref *)(A.data() + stride_a * i), (const int *)&lda_ref, - (const fp_ref *)(x.data() + stride_x * i), (const int *)&incx_ref, - (const fp_ref *)&beta, (fp_ref *)(y_ref.data() + stride_y * i), - (const int *)&incy_ref); + ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), (const int*)&m_ref, + (const int*)&n_ref, (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)(x.data() + stride_x * i), + (const int*)&incx_ref, (const fp_ref*)&beta, (fp_ref*)(y_ref.data() + stride_y * i), + (const int*)&incy_ref); } // Call DPC++ GEMV_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMV_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -164,17 +163,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -192,7 +191,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in } class GemvBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemvBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 5)); diff --git a/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp index d6eb47887..a61d7d318 100644 --- a/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/gemv_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -128,10 +128,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { - ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), (const int *)&m_ref, - (const int *)&n_ref, (const fp_ref *)&alpha, (const fp_ref *)&A[stride_a * i], - (const int *)&lda_ref, (const fp_ref *)&x[stride_x * i], (const int *)&incx_ref, - (const fp_ref *)&beta, (fp_ref *)&y_ref[stride_y * i], (const int *)&incy_ref); + ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), (const int*)&m_ref, + (const int*)&n_ref, (const fp_ref*)&alpha, (const fp_ref*)&A[stride_a * i], + (const int*)&lda_ref, (const fp_ref*)&x[stride_x * i], (const int*)&incx_ref, + (const fp_ref*)&beta, (fp_ref*)&y_ref[stride_y * i], (const int*)&incy_ref); } // Call DPC++ GEMV_BATCH_STRIDE. @@ -171,17 +171,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -197,7 +197,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t incx, int64_t incy, in } class GemvBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemvBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 2, 3, 5)); diff --git a/tests/unit_tests/blas/batch/gemv_batch_usm.cpp b/tests/unit_tests/blas/batch/gemv_batch_usm.cpp index 4ad661f5b..2d257d0be 100644 --- a/tests/unit_tests/blas/batch/gemv_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/gemv_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMV_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -119,9 +119,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), x_array(uafpp), y_array(uafpp), - y_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), x_array(uafpp), y_array(uafpp), y_ref_array(uafpp); a_array.resize(total_batch_count); x_array.resize(total_batch_count); y_array.resize(total_batch_count); @@ -135,10 +134,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { size_x = 1 + (x_len - 1) * std::abs(incx[i]); size_y = 1 + (y_len - 1) * std::abs(incy[i]); for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - x_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt); - y_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt); - y_ref_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt); + a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + x_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_x, *dev, cxt); + y_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt); + y_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_y, *dev, cxt); rand_matrix(a_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], lda[i]); rand_vector(x_array[idx], x_len, incx[i]); rand_vector(y_array[idx], y_len, incy[i]); @@ -149,15 +148,15 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { // Call reference GEMV_BATCH. using fp_ref = typename ref_type_info::type; - int *m_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *n_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *incx_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *incy_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* m_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* incx_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* incy_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - CBLAS_TRANSPOSE *transa_ref = - (CBLAS_TRANSPOSE *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + CBLAS_TRANSPOSE* transa_ref = + (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (incx_ref == NULL) || (incy_ref == NULL) || (transa_ref == NULL) || (group_size_ref == NULL)) { @@ -191,11 +190,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { incy_ref[i] = (int)incy[i]; group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { - ::gemv(convert_to_cblas_layout(layout), transa_ref[i], (const int *)&m_ref[i], - (const int *)&n_ref[i], (const fp_ref *)&alpha[i], (const fp_ref *)a_array[idx], - (const int *)&lda_ref[i], (const fp_ref *)x_array[idx], - (const int *)&incx_ref[i], (const fp_ref *)&beta[i], (fp_ref *)y_ref_array[idx], - (const int *)&incy_ref[i]); + ::gemv(convert_to_cblas_layout(layout), transa_ref[i], (const int*)&m_ref[i], + (const int*)&n_ref[i], (const fp_ref*)&alpha[i], (const fp_ref*)a_array[idx], + (const int*)&lda_ref[i], (const fp_ref*)x_array[idx], (const int*)&incx_ref[i], + (const fp_ref*)&beta[i], (fp_ref*)y_ref_array[idx], (const int*)&incy_ref[i]); idx++; } } @@ -207,14 +205,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { switch (layout) { case oneapi::mkl::layout::col_major: done = oneapi::mkl::blas::column_major::gemv_batch( - main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp **)&a_array[0], - &lda[0], (const fp **)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], + main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp**)&a_array[0], + &lda[0], (const fp**)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: done = oneapi::mkl::blas::row_major::gemv_batch( - main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp **)&a_array[0], - &lda[0], (const fp **)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], + main_queue, &transa[0], &m[0], &n[0], &alpha[0], (const fp**)&a_array[0], + &lda[0], (const fp**)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies); break; default: break; @@ -225,29 +223,28 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { case oneapi::mkl::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::gemv_batch, &transa[0], &m[0], &n[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], (const fp **)&x_array[0], + (const fp**)&a_array[0], &lda[0], (const fp**)&x_array[0], &incx[0], &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: - TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::gemv_batch, - &transa[0], &m[0], &n[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], (const fp **)&x_array[0], - &incx[0], &beta[0], &y_array[0], &incy[0], group_count, - &group_size[0], dependencies); + TEST_RUN_BLAS_CT_SELECT( + main_queue, oneapi::mkl::blas::row_major::gemv_batch, &transa[0], &m[0], &n[0], + &alpha[0], (const fp**)&a_array[0], &lda[0], (const fp**)&x_array[0], &incx[0], + &beta[0], &y_array[0], &incy[0], group_count, &group_size[0], dependencies); break; default: break; } main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::aligned_free(m_ref); oneapi::mkl::aligned_free(n_ref); oneapi::mkl::aligned_free(lda_ref); @@ -268,7 +265,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV_BATCH:\n" << error.what() << std::endl; } @@ -306,7 +303,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class GemvBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemvBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp index ac8bbb2b4..a6e9a6fe5 100644 --- a/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/imatcopy_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -101,11 +101,11 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during IMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -146,17 +146,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -171,7 +171,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class ImatcopyBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp index b3099d309..db40e3a1f 100644 --- a/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/imatcopy_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -103,8 +103,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { AB.resize(stride * batch_size); AB_ref.resize(stride * batch_size); - fp **ab_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **ab_ref_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); + fp** ab_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** ab_ref_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); if ((ab_array == NULL) || (ab_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; oneapi::mkl::free_shared(ab_array, cxt); @@ -166,19 +166,19 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::free_shared(ab_array, cxt); oneapi::mkl::free_shared(ab_ref_array, cxt); return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -194,7 +194,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class ImatcopyBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp b/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp index 74c9881af..d203f2440 100644 --- a/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/imatcopy_batch_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during IMATCOPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -103,8 +103,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector ab_array(uafpp), ab_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector ab_array(uafpp), ab_ref_array(uafpp); ab_array.resize(total_batch_count); ab_ref_array.resize(total_batch_count); @@ -126,8 +126,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } size = std::max(size_a, size_b); for (j = 0; j < group_size[i]; j++) { - ab_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size, *dev, cxt); - ab_ref_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size, *dev, cxt); + ab_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size, *dev, cxt); + ab_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size, *dev, cxt); rand_matrix(ab_array[idx], oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size, 1, size); copy_matrix(ab_array[idx], oneapi::mkl::layout::col_major, @@ -187,13 +187,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { idx = 0; for (i = 0; i < group_count; i++) { for (j = 0; j < group_size[i]; j++) { @@ -205,7 +205,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY_BATCH:\n" << error.what() << std::endl; } @@ -249,7 +249,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class ImatcopyBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp b/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp index cc20d0e3b..f036d0bbb 100644 --- a/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/omatadd_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Prepare data. int64_t m, n; int64_t lda, ldb, ldc; @@ -111,11 +111,11 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATADD_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -162,17 +162,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATADD_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATADD_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -187,7 +187,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class OmataddBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmataddBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp index 7388084cb..59cd4ced3 100644 --- a/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/omatadd_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATADD_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -110,10 +110,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { C.resize(stride_c * batch_size); C_ref.resize(stride_c * batch_size); - fp **a_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **b_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **c_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **c_ref_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); + fp** a_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** b_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** c_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** c_ref_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); if ((a_array == NULL) || (b_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -188,13 +188,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATADD_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::free_shared(a_array, cxt); oneapi::mkl::free_shared(b_array, cxt); oneapi::mkl::free_shared(c_array, cxt); @@ -202,7 +202,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATADD_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -220,7 +220,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class OmataddBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmataddBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp index d08329fc6..16b407890 100644 --- a/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/omatcopy_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -101,11 +101,11 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -149,17 +149,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -174,7 +174,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class OmatcopyBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp index 7479b57db..9533a3030 100644 --- a/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/omatcopy_batch_stride_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -105,9 +105,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { B.resize(stride_b * batch_size); B_ref.resize(stride_b * batch_size); - fp **a_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **b_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **b_ref_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); + fp** a_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** b_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** b_ref_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); if ((a_array == NULL) || (b_array == NULL) || (b_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -175,20 +175,20 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::free_shared(a_array, cxt); oneapi::mkl::free_shared(b_array, cxt); oneapi::mkl::free_shared(b_ref_array, cxt); return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -205,7 +205,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class OmatcopyBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp b/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp index 7f1e4a103..e0eb3feaa 100644 --- a/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/omatcopy_batch_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -103,8 +103,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), b_array(uafpp), b_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), b_array(uafpp), b_ref_array(uafpp); a_array.resize(total_batch_count); b_array.resize(total_batch_count); @@ -126,9 +126,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { default: break; } for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - b_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); - b_ref_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); + a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + b_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); + b_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); rand_matrix(a_array[idx], oneapi::mkl::layout::col_major, oneapi::mkl::transpose::nontrans, size_a, 1, size_a); rand_matrix(b_array[idx], oneapi::mkl::layout::col_major, @@ -161,14 +161,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { case oneapi::mkl::layout::col_major: done = oneapi::mkl::blas::column_major::omatcopy_batch( main_queue, trans.data(), m.data(), n.data(), alpha.data(), - (const fp **)a_array.data(), lda.data(), b_array.data(), ldb.data(), - group_count, group_size.data(), dependencies); + (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count, + group_size.data(), dependencies); break; case oneapi::mkl::layout::row_major: done = oneapi::mkl::blas::row_major::omatcopy_batch( main_queue, trans.data(), m.data(), n.data(), alpha.data(), - (const fp **)a_array.data(), lda.data(), b_array.data(), ldb.data(), - group_count, group_size.data(), dependencies); + (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count, + group_size.data(), dependencies); break; default: break; } @@ -178,13 +178,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { case oneapi::mkl::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::omatcopy_batch, trans.data(), m.data(), n.data(), alpha.data(), - (const fp **)a_array.data(), lda.data(), b_array.data(), + (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count, group_size.data(), dependencies); break; case oneapi::mkl::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::omatcopy_batch, trans.data(), m.data(), n.data(), alpha.data(), - (const fp **)a_array.data(), lda.data(), b_array.data(), + (const fp**)a_array.data(), lda.data(), b_array.data(), ldb.data(), group_count, group_size.data(), dependencies); break; default: break; @@ -192,13 +192,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { idx = 0; for (i = 0; i < group_count; i++) { for (j = 0; j < group_size[i]; j++) { @@ -211,7 +211,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY_BATCH:\n" << error.what() << std::endl; } @@ -255,7 +255,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class OmatcopyBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/syrk_batch_stride.cpp b/tests/unit_tests/blas/batch/syrk_batch_stride.cpp index 58dc4d7dc..aeb33c42e 100644 --- a/tests/unit_tests/blas/batch/syrk_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/syrk_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Prepare data. int64_t n, k; int64_t lda, ldc; @@ -67,9 +67,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { upper_lower = (oneapi::mkl::uplo)(std::rand() % 2); if ((std::is_same::value) || (std::is_same::value)) { - trans = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans - : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans - : oneapi::mkl::transpose::conjtrans; + trans = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans + : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans + : oneapi::mkl::transpose::conjtrans; } else { trans = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans @@ -110,21 +110,21 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { for (i = 0; i < batch_size_ref; i++) { ::syrk(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), - convert_to_cblas_trans(trans), (const int *)&n_ref, (const int *)&k_ref, - (const fp_ref *)&alpha, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (const fp_ref *)&beta, - (fp_ref *)(C_ref.data() + stride_c * i), (const int *)&ldc_ref); + convert_to_cblas_trans(trans), (const int*)&n_ref, (const int*)&k_ref, + (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)&beta, (fp_ref*)(C_ref.data() + stride_c * i), + (const int*)&ldc_ref); } // Call DPC++ SYRK_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYRK_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -168,17 +168,17 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYRK_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYRK_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -194,7 +194,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class SyrkBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SyrkBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp index 31aa09b79..b1f66fa07 100644 --- a/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/syrk_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { +int test(device* dev, oneapi::mkl::layout layout, int64_t batch_size) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYRK_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -86,9 +86,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { beta = rand_scalar(); upper_lower = (oneapi::mkl::uplo)(std::rand() % 2); if ((std::is_same::value) || (std::is_same::value)) { - trans = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans - : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans - : oneapi::mkl::transpose::conjtrans; + trans = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans + : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans + : oneapi::mkl::transpose::conjtrans; } else { trans = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans @@ -116,9 +116,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { C.resize(stride_c * batch_size); C_ref.resize(stride_c * batch_size); - fp **a_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **c_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); - fp **c_ref_array = (fp **)oneapi::mkl::malloc_shared(64, sizeof(fp *) * batch_size, *dev, cxt); + fp** a_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** c_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); + fp** c_ref_array = (fp**)oneapi::mkl::malloc_shared(64, sizeof(fp*) * batch_size, *dev, cxt); if ((a_array == NULL) || (c_array == NULL) || (c_ref_array == NULL)) { std::cout << "Error cannot allocate arrays of pointers\n"; @@ -150,10 +150,10 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { int batch_size_ref = (int)batch_size; for (i = 0; i < batch_size_ref; i++) { ::syrk(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), - convert_to_cblas_trans(trans), (const int *)&n_ref, (const int *)&k_ref, - (const fp_ref *)&alpha, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (const fp_ref *)&beta, - (fp_ref *)(C_ref.data() + stride_c * i), (const int *)&ldc_ref); + convert_to_cblas_trans(trans), (const int*)&n_ref, (const int*)&k_ref, + (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (const fp_ref*)&beta, (fp_ref*)(C_ref.data() + stride_c * i), + (const int*)&ldc_ref); } // Call DPC++ SYRK_BATCH_STRIDE. @@ -191,20 +191,20 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYRK_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::free_shared(a_array, cxt); oneapi::mkl::free_shared(c_array, cxt); oneapi::mkl::free_shared(c_ref_array, cxt); return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYRK_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -221,7 +221,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t batch_size) { } class SyrkBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SyrkBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/syrk_batch_usm.cpp b/tests/unit_tests/blas/batch/syrk_batch_usm.cpp index 36d0d6dd5..b331b4c66 100644 --- a/tests/unit_tests/blas/batch/syrk_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/syrk_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYRK_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -106,10 +106,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { beta[i] = rand_scalar(); upper_lower[i] = (oneapi::mkl::uplo)(std::rand() % 2); if ((std::is_same::value) || (std::is_same::value)) { - trans[i] = (std::rand() % 2) == 0 - ? oneapi::mkl::transpose::nontrans - : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans - : oneapi::mkl::transpose::conjtrans; + trans[i] = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans + : (std::rand() % 2) == 0 ? oneapi::mkl::transpose::trans + : oneapi::mkl::transpose::conjtrans; } else { trans[i] = (std::rand() % 2) == 0 ? oneapi::mkl::transpose::nontrans @@ -118,8 +117,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), c_array(uafpp), c_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), c_array(uafpp), c_ref_array(uafpp); a_array.resize(total_batch_count); c_array.resize(total_batch_count); c_ref_array.resize(total_batch_count); @@ -138,9 +137,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { default: break; } for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - c_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); - c_ref_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); + a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + c_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); + c_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_c, *dev, cxt); rand_matrix(a_array[idx], layout, trans[i], n[i], k[i], lda[i]); rand_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, n[i], n[i], ldc[i]); copy_matrix(c_array[idx], layout, oneapi::mkl::transpose::nontrans, n[i], n[i], ldc[i], @@ -151,16 +150,16 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { // Call reference SYRK_BATCH. using fp_ref = typename ref_type_info::type; - int *n_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *k_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *ldc_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* k_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* ldc_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - CBLAS_UPLO *upper_lower_ref = - (CBLAS_UPLO *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count); - CBLAS_TRANSPOSE *trans_ref = - (CBLAS_TRANSPOSE *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + CBLAS_UPLO* upper_lower_ref = + (CBLAS_UPLO*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count); + CBLAS_TRANSPOSE* trans_ref = + (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); if ((n_ref == NULL) || (k_ref == NULL) || (lda_ref == NULL) || (ldc_ref == NULL) || (trans_ref == NULL) || (upper_lower_ref == NULL) || (group_size_ref == NULL)) { @@ -194,9 +193,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { ::syrk(convert_to_cblas_layout(layout), upper_lower_ref[i], trans_ref[i], - (const int *)&n_ref[i], (const int *)&k_ref[i], (const fp_ref *)&alpha[i], - (const fp_ref *)a_array[idx], (const int *)&lda_ref[i], (const fp_ref *)&beta[i], - (fp_ref *)c_ref_array[idx], (const int *)&ldc_ref[i]); + (const int*)&n_ref[i], (const int*)&k_ref[i], (const fp_ref*)&alpha[i], + (const fp_ref*)a_array[idx], (const int*)&lda_ref[i], (const fp_ref*)&beta[i], + (fp_ref*)c_ref_array[idx], (const int*)&ldc_ref[i]); idx++; } } @@ -209,13 +208,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { case oneapi::mkl::layout::col_major: done = oneapi::mkl::blas::column_major::syrk_batch( main_queue, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, + (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: done = oneapi::mkl::blas::row_major::syrk_batch( main_queue, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, + (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -226,13 +225,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { case oneapi::mkl::layout::col_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::syrk_batch, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &beta[0], &c_array[0], + (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::syrk_batch, &upper_lower[0], &trans[0], &n[0], &k[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &beta[0], &c_array[0], + (const fp**)&a_array[0], &lda[0], &beta[0], &c_array[0], &ldc[0], group_count, &group_size[0], dependencies); break; default: break; @@ -240,13 +239,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYRK_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::aligned_free(n_ref); oneapi::mkl::aligned_free(k_ref); oneapi::mkl::aligned_free(lda_ref); @@ -266,7 +265,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYRK_BATCH:\n" << error.what() << std::endl; } @@ -301,7 +300,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class SyrkBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SyrkBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/batch/trsm_batch_stride.cpp b/tests/unit_tests/blas/batch/trsm_batch_stride.cpp index cde6aa367..c85e7a885 100644 --- a/tests/unit_tests/blas/batch/trsm_batch_stride.cpp +++ b/tests/unit_tests/blas/batch/trsm_batch_stride.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -116,21 +116,20 @@ int test(device *dev, oneapi::mkl::layout layout) { for (i = 0; i < batch_size_ref; i++) { ::trsm(convert_to_cblas_layout(layout), convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - convert_to_cblas_diag(unit_nonunit), (const int *)&m_ref, (const int *)&n_ref, - (const fp_ref *)&alpha, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (fp_ref *)(B_ref.data() + stride_b * i), - (const int *)&ldb_ref); + convert_to_cblas_diag(unit_nonunit), (const int*)&m_ref, (const int*)&n_ref, + (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (fp_ref*)(B_ref.data() + stride_b * i), (const int*)&ldb_ref); } // Call DPC++ TRSM_BATCH_STRIDE. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during TRSM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -176,17 +175,17 @@ int test(device *dev, oneapi::mkl::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during TRSM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of TRSM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -201,7 +200,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class TrsmBatchStrideTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(TrsmBatchStrideTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp b/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp index d99836f87..1b518d5bb 100644 --- a/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp +++ b/tests/unit_tests/blas/batch/trsm_batch_stride_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during TRSM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); @@ -137,10 +137,9 @@ int test(device *dev, oneapi::mkl::layout layout) { for (i = 0; i < batch_size_ref; i++) { ::trsm(convert_to_cblas_layout(layout), convert_to_cblas_side(left_right), convert_to_cblas_uplo(upper_lower), convert_to_cblas_trans(trans), - convert_to_cblas_diag(unit_nonunit), (const int *)&m_ref, (const int *)&n_ref, - (const fp_ref *)&alpha, (const fp_ref *)(A.data() + stride_a * i), - (const int *)&lda_ref, (fp_ref *)(B_ref.data() + stride_b * i), - (const int *)&ldb_ref); + convert_to_cblas_diag(unit_nonunit), (const int*)&m_ref, (const int*)&n_ref, + (const fp_ref*)&alpha, (const fp_ref*)(A.data() + stride_a * i), + (const int*)&lda_ref, (fp_ref*)(B_ref.data() + stride_b * i), (const int*)&ldb_ref); } // Call DPC++ TRSM_BATCH_STRIDE. @@ -180,17 +179,17 @@ int test(device *dev, oneapi::mkl::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during TRSM_BATCH_STRIDE:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of TRSM_BATCH_STRIDE:\n" << error.what() << std::endl; } @@ -203,7 +202,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class TrsmBatchStrideUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(TrsmBatchStrideUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/batch/trsm_batch_usm.cpp b/tests/unit_tests/blas/batch/trsm_batch_usm.cpp index 747f59433..b7ddff8c8 100644 --- a/tests/unit_tests/blas/batch/trsm_batch_usm.cpp +++ b/tests/unit_tests/blas/batch/trsm_batch_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { +int test(device* dev, oneapi::mkl::layout layout, int64_t group_count) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during TRSM_BATCH:\n" << e.what() << std::endl; print_error_code(e); @@ -128,8 +128,8 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { total_batch_count += group_size[i]; } - auto uafpp = usm_allocator(cxt, *dev); - vector a_array(uafpp), b_array(uafpp), b_ref_array(uafpp); + auto uafpp = usm_allocator(cxt, *dev); + vector a_array(uafpp), b_array(uafpp), b_ref_array(uafpp); a_array.resize(total_batch_count); b_array.resize(total_batch_count); @@ -141,9 +141,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { Arank = left_right[i] == oneapi::mkl::side::left ? m[i] : n[i]; size_b = ldb[i] * ((layout == oneapi::mkl::layout::col_major) ? n[i] : m[i]); for (j = 0; j < group_size[i]; j++) { - a_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); - b_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); - b_ref_array[idx] = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); + a_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_a, *dev, cxt); + b_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); + b_ref_array[idx] = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp) * size_b, *dev, cxt); rand_trsm_matrix(a_array[idx], layout, trans[i], Arank, Arank, lda[i]); rand_matrix(b_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldb[i]); copy_matrix(b_array[idx], layout, oneapi::mkl::transpose::nontrans, m[i], n[i], ldb[i], @@ -154,20 +154,20 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { // Call reference TRSM_BATCH. using fp_ref = typename ref_type_info::type; - int *m_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *n_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *lda_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *ldb_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - int *group_size_ref = (int *)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); - - CBLAS_TRANSPOSE *trans_ref = - (CBLAS_TRANSPOSE *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); - CBLAS_SIDE *left_right_ref = - (CBLAS_SIDE *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count); - CBLAS_UPLO *upper_lower_ref = - (CBLAS_UPLO *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count); - CBLAS_DIAG *unit_nonunit_ref = - (CBLAS_DIAG *)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_DIAG) * group_count); + int* m_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* n_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* lda_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* ldb_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + int* group_size_ref = (int*)oneapi::mkl::aligned_alloc(64, sizeof(int) * group_count); + + CBLAS_TRANSPOSE* trans_ref = + (CBLAS_TRANSPOSE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_TRANSPOSE) * group_count); + CBLAS_SIDE* left_right_ref = + (CBLAS_SIDE*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_SIDE) * group_count); + CBLAS_UPLO* upper_lower_ref = + (CBLAS_UPLO*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_UPLO) * group_count); + CBLAS_DIAG* unit_nonunit_ref = + (CBLAS_DIAG*)oneapi::mkl::aligned_alloc(64, sizeof(CBLAS_DIAG) * group_count); if ((m_ref == NULL) || (n_ref == NULL) || (lda_ref == NULL) || (ldb_ref == NULL) || (trans_ref == NULL) || (left_right_ref == NULL) || (upper_lower_ref == NULL) || @@ -206,9 +206,9 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { group_size_ref[i] = (int)group_size[i]; for (j = 0; j < group_size_ref[i]; j++) { ::trsm(convert_to_cblas_layout(layout), left_right_ref[i], upper_lower_ref[i], - trans_ref[i], unit_nonunit_ref[i], (const int *)&m_ref[i], - (const int *)&n_ref[i], (const fp_ref *)&alpha[i], (const fp_ref *)a_array[idx], - (const int *)&lda_ref[i], b_ref_array[idx], (const int *)&ldb_ref[i]); + trans_ref[i], unit_nonunit_ref[i], (const int*)&m_ref[i], (const int*)&n_ref[i], + (const fp_ref*)&alpha[i], (const fp_ref*)a_array[idx], (const int*)&lda_ref[i], + b_ref_array[idx], (const int*)&ldb_ref[i]); idx++; } } @@ -221,13 +221,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { case oneapi::mkl::layout::col_major: done = oneapi::mkl::blas::column_major::trsm_batch( main_queue, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0], - &n[0], &alpha[0], (const fp **)&a_array[0], &lda[0], &b_array[0], &ldb[0], + &n[0], &alpha[0], (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: done = oneapi::mkl::blas::row_major::trsm_batch( main_queue, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0], - &n[0], &alpha[0], (const fp **)&a_array[0], &lda[0], &b_array[0], &ldb[0], + &n[0], &alpha[0], (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0], group_count, &group_size[0], dependencies); break; default: break; @@ -239,14 +239,14 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::column_major::trsm_batch, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0], &n[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &b_array[0], &ldb[0], + (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0], group_count, &group_size[0], dependencies); break; case oneapi::mkl::layout::row_major: TEST_RUN_BLAS_CT_SELECT(main_queue, oneapi::mkl::blas::row_major::trsm_batch, &left_right[0], &upper_lower[0], &trans[0], &unit_nonunit[0], &m[0], &n[0], &alpha[0], - (const fp **)&a_array[0], &lda[0], &b_array[0], &ldb[0], + (const fp**)&a_array[0], &lda[0], &b_array[0], &ldb[0], group_count, &group_size[0], dependencies); break; default: break; @@ -254,13 +254,13 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during TRSM_BATCH:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { oneapi::mkl::aligned_free(m_ref); oneapi::mkl::aligned_free(n_ref); oneapi::mkl::aligned_free(lda_ref); @@ -282,7 +282,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of TRSM_BATCH:\n" << error.what() << std::endl; } @@ -319,7 +319,7 @@ int test(device *dev, oneapi::mkl::layout layout, int64_t group_count) { } class TrsmBatchUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(TrsmBatchUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()), 5)); diff --git a/tests/unit_tests/blas/extensions/imatcopy.cpp b/tests/unit_tests/blas/extensions/imatcopy.cpp index e21702775..ba9400817 100644 --- a/tests/unit_tests/blas/extensions/imatcopy.cpp +++ b/tests/unit_tests/blas/extensions/imatcopy.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -95,11 +95,11 @@ int test(device *dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during IMATCOPY:\n" << e.what() << std::endl; print_error_code(e); @@ -138,17 +138,17 @@ int test(device *dev, oneapi::mkl::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY:\n" << error.what() << std::endl; } @@ -162,7 +162,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class ImatcopyTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/imatcopy_usm.cpp b/tests/unit_tests/blas/extensions/imatcopy_usm.cpp index dc3d43d2e..1acf4ecaf 100644 --- a/tests/unit_tests/blas/extensions/imatcopy_usm.cpp +++ b/tests/unit_tests/blas/extensions/imatcopy_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during IMATCOPY:\n" << e.what() << std::endl; print_error_code(e); @@ -145,17 +145,17 @@ int test(device *dev, oneapi::mkl::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during IMATCOPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of IMATCOPY:\n" << error.what() << std::endl; } @@ -167,7 +167,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class ImatcopyUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(ImatcopyUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatadd.cpp b/tests/unit_tests/blas/extensions/omatadd.cpp index b2af98935..7e76f74f9 100644 --- a/tests/unit_tests/blas/extensions/omatadd.cpp +++ b/tests/unit_tests/blas/extensions/omatadd.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb, ldc; @@ -106,11 +106,11 @@ int test(device *dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATADD:\n" << e.what() << std::endl; print_error_code(e); @@ -155,16 +155,16 @@ int test(device *dev, oneapi::mkl::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATADD:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATADD:\n" << error.what() << std::endl; } @@ -178,7 +178,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class OmataddTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmataddTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatadd_usm.cpp b/tests/unit_tests/blas/extensions/omatadd_usm.cpp index 783f985b2..eff40ae8d 100644 --- a/tests/unit_tests/blas/extensions/omatadd_usm.cpp +++ b/tests/unit_tests/blas/extensions/omatadd_usm.cpp @@ -43,19 +43,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATADD:\n" << e.what() << std::endl; print_error_code(e); @@ -161,16 +161,16 @@ int test(device *dev, oneapi::mkl::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATADD:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATADD:\n" << error.what() << std::endl; } @@ -182,7 +182,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class OmataddUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmataddUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatcopy.cpp b/tests/unit_tests/blas/extensions/omatcopy.cpp index 122ba2c79..1ba35d057 100644 --- a/tests/unit_tests/blas/extensions/omatcopy.cpp +++ b/tests/unit_tests/blas/extensions/omatcopy.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -103,11 +103,11 @@ int test(device *dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY:\n" << e.what() << std::endl; print_error_code(e); @@ -147,17 +147,17 @@ int test(device *dev, oneapi::mkl::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY:\n" << error.what() << std::endl; } @@ -171,7 +171,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class OmatcopyTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatcopy2.cpp b/tests/unit_tests/blas/extensions/omatcopy2.cpp index d0407c324..3bc7dfccb 100644 --- a/tests/unit_tests/blas/extensions/omatcopy2.cpp +++ b/tests/unit_tests/blas/extensions/omatcopy2.cpp @@ -43,12 +43,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Prepare data. int64_t m, n; int64_t lda, ldb; @@ -100,11 +100,11 @@ int test(device *dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY2:\n" << e.what() << std::endl; print_error_code(e); @@ -146,17 +146,17 @@ int test(device *dev, oneapi::mkl::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY2:\n" << error.what() << std::endl; } @@ -170,7 +170,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class Omatcopy2Tests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Omatcopy2Tests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp b/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp index d2103d243..3dcf87dc1 100644 --- a/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp +++ b/tests/unit_tests/blas/extensions/omatcopy2_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY2:\n" << e.what() << std::endl; print_error_code(e); @@ -157,17 +157,17 @@ int test(device *dev, oneapi::mkl::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY2:\n" << error.what() << std::endl; } @@ -179,7 +179,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class Omatcopy2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Omatcopy2UsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/extensions/omatcopy_usm.cpp b/tests/unit_tests/blas/extensions/omatcopy_usm.cpp index ac9ba2d5c..b217e2f54 100644 --- a/tests/unit_tests/blas/extensions/omatcopy_usm.cpp +++ b/tests/unit_tests/blas/extensions/omatcopy_usm.cpp @@ -44,19 +44,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during OMATCOPY:\n" << e.what() << std::endl; print_error_code(e); @@ -147,17 +147,17 @@ int test(device *dev, oneapi::mkl::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during OMATCOPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of OMATCOPY:\n" << error.what() << std::endl; } @@ -169,7 +169,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class OmatcopyUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(OmatcopyUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/include/reference_blas_templates.hpp b/tests/unit_tests/blas/include/reference_blas_templates.hpp index 6d184ba75..de7e36d40 100644 --- a/tests/unit_tests/blas/include/reference_blas_templates.hpp +++ b/tests/unit_tests/blas/include/reference_blas_templates.hpp @@ -33,8 +33,8 @@ inline bool isNonTranspose(CBLAS_TRANSPOSE trans) { } template -static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, int row, - int col, int ld, T_dest *&dest) { +static inline void copy_mat(T_src& src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, int row, + int col, int ld, T_dest*& dest) { int i, j, Iend, Jend; if (layout == CblasColMajor) { Jend = isNonTranspose(trans) ? col : row; @@ -53,8 +53,8 @@ static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE tra } template -static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, int row, - int col, int ld, T_dest off, T_dest *&dest) { +static inline void copy_mat(T_src& src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, int row, + int col, int ld, T_dest off, T_dest*& dest) { int i, j, Iend, Jend; if (layout == CblasColMajor) { Jend = isNonTranspose(trans) ? col : row; @@ -73,8 +73,8 @@ static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, CBLAS_TRANSPOSE tra } template -static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, int row, int col, int ld, - CBLAS_OFFSET off_kind, T_off off, T_dest &dest) { +static inline void copy_mat(T_src& src, CBLAS_LAYOUT layout, int row, int col, int ld, + CBLAS_OFFSET off_kind, T_off off, T_dest& dest) { using T_data = typename std::remove_reference::type; int i, j; T_data tmp; @@ -110,8 +110,8 @@ static inline void copy_mat(T_src &src, CBLAS_LAYOUT layout, int row, int col, i } template -static inline void update_c(T_src &src, CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, int row, - int col, int ld, T_desc *&dest) { +static inline void update_c(T_src& src, CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, int row, + int col, int ld, T_desc*& dest) { int i, j; int Jend = (layout == CblasColMajor) ? col : row; @@ -139,15 +139,15 @@ static inline void update_c(T_src &src, CBLAS_LAYOUT layout, CBLAS_UPLO upper_lo /* Level 3 */ template -static void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const fp *alpha, const fp *a, const int *lda, - const fp *b, const int *ldb, const fp *beta, fp *c, const int *ldc); +static void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const fp* alpha, const fp* a, const int* lda, + const fp* b, const int* ldb, const fp* beta, fp* c, const int* ldc); template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const sycl::half *alpha, const sycl::half *a, const int *lda, - const sycl::half *b, const int *ldb, const sycl::half *beta, sycl::half *c, - const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const sycl::half* alpha, const sycl::half* a, const int* lda, + const sycl::half* b, const int* ldb, const sycl::half* beta, sycl::half* c, + const int* ldc) { // Not supported in NETLIB. SGEMM is used as reference. int sizea, sizeb, sizec; const float alphaf = *alpha; @@ -162,9 +162,9 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - float *af = (float *)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea); - float *bf = (float *)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb); - float *cf = (float *)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizec); + float* af = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea); + float* bf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb); + float* cf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizec); copy_mat(a, layout, transa, *m, *k, *lda, af); copy_mat(b, layout, transb, *k, *n, *ldb, bf); copy_mat(c, layout, CblasNoTrans, *m, *n, *ldc, cf); @@ -177,49 +177,49 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const float *alpha, const float *a, const int *lda, - const float *b, const int *ldb, const float *beta, float *c, const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const float* alpha, const float* a, const int* lda, + const float* b, const int* ldb, const float* beta, float* c, const int* ldc) { cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const double *alpha, const double *a, const int *lda, - const double *b, const int *ldb, const double *beta, double *c, const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const double* alpha, const double* a, const int* lda, + const double* b, const int* ldb, const double* beta, double* c, const int* ldc) { cblas_dgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *b, - const int *ldb, const std::complex *beta, std::complex *c, const int *ldc) { - cblas_cgemm_wrapper(layout, transa, transb, *m, *n, *k, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const std::complex* alpha, + const std::complex* a, const int* lda, const std::complex* b, + const int* ldb, const std::complex* beta, std::complex* c, const int* ldc) { + cblas_cgemm_wrapper(layout, transa, transb, *m, *n, *k, (const void*)alpha, (const void*)a, + *lda, (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *b, - const int *ldb, const std::complex *beta, std::complex *c, - const int *ldc) { - cblas_zgemm_wrapper(layout, transa, transb, *m, *n, *k, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const std::complex* alpha, + const std::complex* a, const int* lda, const std::complex* b, + const int* ldb, const std::complex* beta, std::complex* c, + const int* ldc) { + cblas_zgemm_wrapper(layout, transa, transb, *m, *n, *k, (const void*)alpha, (const void*)a, + *lda, (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template -static void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const fpc *alpha, const fpa *a, const int *lda, - const fpa *b, const int *ldb, const fpc *beta, fpc *c, const int *ldc); +static void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const fpc* alpha, const fpa* a, const int* lda, + const fpa* b, const int* ldb, const fpc* beta, fpc* c, const int* ldc); template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const float *alpha, const sycl::half *a, const int *lda, - const sycl::half *b, const int *ldb, const float *beta, float *c, const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const float* alpha, const sycl::half* a, const int* lda, + const sycl::half* b, const int* ldb, const float* beta, float* c, const int* ldc) { // Not supported in NETLIB. SGEMM is used as reference. int sizea, sizeb; if (layout == CblasColMajor) { @@ -230,8 +230,8 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c sizea = (transa == CblasNoTrans) ? *lda * *m : *lda * *k; sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; } - float *af = (float *)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea); - float *bf = (float *)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb); + float* af = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea); + float* bf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb); copy_mat(a, layout, transa, *m, *k, *lda, af); copy_mat(b, layout, transb, *k, *n, *ldb, bf); cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, af, *lda, bf, *ldb, *beta, c, @@ -241,10 +241,10 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c } template <> -void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int *m, - const int *n, const int *k, const float *alpha, const oneapi::mkl::bfloat16 *a, - const int *lda, const oneapi::mkl::bfloat16 *b, const int *ldb, const float *beta, - float *c, const int *ldc) { +void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int* m, + const int* n, const int* k, const float* alpha, const oneapi::mkl::bfloat16* a, + const int* lda, const oneapi::mkl::bfloat16* b, const int* ldb, const float* beta, + float* c, const int* ldc) { // Not supported in NETLIB. SGEMM is used as reference. int sizea, sizeb; if (layout == CblasColMajor) { @@ -255,8 +255,8 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c sizea = (transa == CblasNoTrans) ? *lda * *m : *lda * *k; sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; } - float *af = (float *)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea); - float *bf = (float *)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb); + float* af = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizea); + float* bf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizeb); copy_mat(a, layout, transa, *m, *k, *lda, af); copy_mat(b, layout, transb, *k, *n, *ldb, bf); cblas_sgemm_wrapper(layout, transa, transb, *m, *n, *k, *alpha, af, *lda, bf, *ldb, *beta, c, @@ -266,1146 +266,1142 @@ void gemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, c } template -static void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, - const int *n, const fp *alpha, const fp *a, const int *lda, const fp *b, - const int *ldb, const fp *beta, fp *c, const int *ldc); +static void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, + const int* n, const fp* alpha, const fp* a, const int* lda, const fp* b, + const int* ldb, const fp* beta, fp* c, const int* ldc); template <> -void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc) { +void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const float* alpha, const float* a, const int* lda, const float* b, const int* ldb, + const float* beta, float* c, const int* ldc) { cblas_ssymm_wrapper(layout, left_right, uplo, *m, *n, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc) { +void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const double* alpha, const double* a, const int* lda, const double* b, const int* ldb, + const double* beta, double* c, const int* ldc) { cblas_dsymm_wrapper(layout, left_right, uplo, *m, *n, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_csymm_wrapper(layout, left_right, uplo, *m, *n, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_csymm_wrapper(layout, left_right, uplo, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template <> -void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_zsymm_wrapper(layout, left_right, uplo, *m, *n, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void symm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_zsymm_wrapper(layout, left_right, uplo, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template -static void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, - const int *k, const fp *alpha, const fp *a, const int *lda, const fp *beta, fp *c, - const int *ldc); +static void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, + const int* k, const fp* alpha, const fp* a, const int* lda, const fp* beta, fp* c, + const int* ldc); template <> -void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *beta, float *c, - const int *ldc) { +void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const float* alpha, const float* a, const int* lda, const float* beta, float* c, + const int* ldc) { cblas_ssyrk_wrapper(layout, uplo, trans, *n, *k, *alpha, a, *lda, *beta, c, *ldc); } template <> -void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *beta, double *c, - const int *ldc) { +void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, const double* beta, double* c, + const int* ldc) { cblas_dsyrk_wrapper(layout, uplo, trans, *n, *k, *alpha, a, *lda, *beta, c, *ldc); } template <> -void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *beta, std::complex *c, const int *ldc) { - cblas_csyrk_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)beta, (void *)c, *ldc); +void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* beta, std::complex* c, const int* ldc) { + cblas_csyrk_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)beta, (void*)c, *ldc); } template <> -void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *beta, std::complex *c, const int *ldc) { - cblas_zsyrk_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)beta, (void *)c, *ldc); +void syrk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* beta, std::complex* c, const int* ldc) { + cblas_zsyrk_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)beta, (void*)c, *ldc); } template -static void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, - const int *n, const fp *alpha, const fp *a, const int *lda, const fp *b, - const int *ldb, const fp *beta, fp *c, const int *ldc); +static void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, + const int* n, const fp* alpha, const fp* a, const int* lda, const fp* b, + const int* ldb, const fp* beta, fp* c, const int* ldc); template <> -void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_chemm_wrapper(layout, left_right, uplo, *m, *n, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_chemm_wrapper(layout, left_right, uplo, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template <> -void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_zhemm_wrapper(layout, left_right, uplo, *m, *n, (const void *)alpha, (const void *)a, - *lda, (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void hemm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_zhemm_wrapper(layout, left_right, uplo, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template -static void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, - const int *k, const fp_scalar *alpha, const fp_data *a, const int *lda, - const fp_scalar *beta, fp_data *c, const int *ldc); +static void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, + const int* k, const fp_scalar* alpha, const fp_data* a, const int* lda, + const fp_scalar* beta, fp_data* c, const int* ldc); template <> -void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const float *alpha, const std::complex *a, const int *lda, const float *beta, - std::complex *c, const int *ldc) { - cblas_cherk_wrapper(layout, uplo, trans, *n, *k, *alpha, (const void *)a, *lda, *beta, - (void *)c, *ldc); +void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const float* alpha, const std::complex* a, const int* lda, const float* beta, + std::complex* c, const int* ldc) { + cblas_cherk_wrapper(layout, uplo, trans, *n, *k, *alpha, (const void*)a, *lda, *beta, (void*)c, + *ldc); } template <> -void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const double *alpha, const std::complex *a, const int *lda, const double *beta, - std::complex *c, const int *ldc) { - cblas_zherk_wrapper(layout, uplo, trans, *n, *k, *alpha, (const void *)a, *lda, *beta, - (void *)c, *ldc); +void herk(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const double* alpha, const std::complex* a, const int* lda, const double* beta, + std::complex* c, const int* ldc) { + cblas_zherk_wrapper(layout, uplo, trans, *n, *k, *alpha, (const void*)a, *lda, *beta, (void*)c, + *ldc); } template -static void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, - const int *k, const fp *alpha, const fp *a, const int *lda, const fp *b, - const int *ldb, const fp *beta, fp *c, const int *ldc); +static void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, + const int* k, const fp* alpha, const fp* a, const int* lda, const fp* b, + const int* ldb, const fp* beta, fp* c, const int* ldc); template <> -void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *b, const int *ldb, - const float *beta, float *c, const int *ldc) { +void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const float* alpha, const float* a, const int* lda, const float* b, const int* ldb, + const float* beta, float* c, const int* ldc) { cblas_ssyr2k_wrapper(layout, uplo, trans, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *b, const int *ldb, - const double *beta, double *c, const int *ldc) { +void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, const double* b, const int* ldb, + const double* beta, double* c, const int* ldc) { cblas_dsyr2k_wrapper(layout, uplo, trans, *n, *k, *alpha, a, *lda, b, *ldb, *beta, c, *ldc); } template <> -void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_csyr2k_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_csyr2k_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template <> -void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const std::complex *beta, - std::complex *c, const int *ldc) { - cblas_zsyr2k_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)b, *ldb, (const void *)beta, (void *)c, *ldc); +void syr2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const std::complex* beta, + std::complex* c, const int* ldc) { + cblas_zsyr2k_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, (const void*)beta, (void*)c, *ldc); } template -static void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, - const int *k, const fp_data *alpha, const fp_data *a, const int *lda, - const fp_data *b, const int *ldb, const fp_scalar *beta, fp_data *c, - const int *ldc); +static void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, + const int* k, const fp_data* alpha, const fp_data* a, const int* lda, + const fp_data* b, const int* ldb, const fp_scalar* beta, fp_data* c, + const int* ldc); template <> -void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const float *beta, std::complex *c, - const int *ldc) { - cblas_cher2k_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)b, *ldb, *beta, (void *)c, *ldc); +void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const float* beta, std::complex* c, + const int* ldc) { + cblas_cher2k_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, *beta, (void*)c, *ldc); } template <> -void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *b, const int *ldb, const double *beta, - std::complex *c, const int *ldc) { - cblas_zher2k_wrapper(layout, uplo, trans, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)b, *ldb, *beta, (void *)c, *ldc); +void her2k(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* b, const int* ldb, const double* beta, + std::complex* c, const int* ldc) { + cblas_zher2k_wrapper(layout, uplo, trans, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)b, *ldb, *beta, (void*)c, *ldc); } template static void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const fp *alpha, const fp *a, - const int *lda, fp *b, const int *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const fp* alpha, const fp* a, + const int* lda, fp* b, const int* ldb); template <> void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const float *alpha, const float *a, - const int *lda, float *b, const int *ldb) { + CBLAS_DIAG diag, const int* m, const int* n, const float* alpha, const float* a, + const int* lda, float* b, const int* ldb) { cblas_strmm_wrapper(layout, side, uplo, transa, diag, *m, *n, *alpha, a, *lda, b, *ldb); } template <> void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const double *alpha, const double *a, - const int *lda, double *b, const int *ldb) { + CBLAS_DIAG diag, const int* m, const int* n, const double* alpha, const double* a, + const int* lda, double* b, const int* ldb) { cblas_dtrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, *alpha, a, *lda, b, *ldb); } template <> void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, std::complex *b, const int *ldb) { - cblas_ctrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void *)alpha, - (const void *)a, *lda, (void *)b, *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const std::complex* alpha, + const std::complex* a, const int* lda, std::complex* b, const int* ldb) { + cblas_ctrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void*)alpha, + (const void*)a, *lda, (void*)b, *ldb); } template <> void trmm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, std::complex *b, const int *ldb) { - cblas_ztrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void *)alpha, - (const void *)a, *lda, (void *)b, *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const std::complex* alpha, + const std::complex* a, const int* lda, std::complex* b, const int* ldb) { + cblas_ztrmm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void*)alpha, + (const void*)a, *lda, (void*)b, *ldb); } template static void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const fp *alpha, const fp *a, - const int *lda, fp *b, const int *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const fp* alpha, const fp* a, + const int* lda, fp* b, const int* ldb); template <> void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const float *alpha, const float *a, - const int *lda, float *b, const int *ldb) { + CBLAS_DIAG diag, const int* m, const int* n, const float* alpha, const float* a, + const int* lda, float* b, const int* ldb) { cblas_strsm_wrapper(layout, side, uplo, transa, diag, *m, *n, *alpha, a, *lda, b, *ldb); } template <> void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const double *alpha, const double *a, - const int *lda, double *b, const int *ldb) { + CBLAS_DIAG diag, const int* m, const int* n, const double* alpha, const double* a, + const int* lda, double* b, const int* ldb) { cblas_dtrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, *alpha, a, *lda, b, *ldb); } template <> void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, std::complex *b, const int *ldb) { - cblas_ctrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void *)alpha, - (const void *)a, *lda, (void *)b, *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const std::complex* alpha, + const std::complex* a, const int* lda, std::complex* b, const int* ldb) { + cblas_ctrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void*)alpha, + (const void*)a, *lda, (void*)b, *ldb); } template <> void trsm(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int *m, const int *n, const std::complex *alpha, - const std::complex *a, const int *lda, std::complex *b, const int *ldb) { - cblas_ztrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void *)alpha, - (const void *)a, *lda, (void *)b, *ldb); + CBLAS_DIAG diag, const int* m, const int* n, const std::complex* alpha, + const std::complex* a, const int* lda, std::complex* b, const int* ldb) { + cblas_ztrsm_wrapper(layout, side, uplo, transa, diag, *m, *n, (const void*)alpha, + (const void*)a, *lda, (void*)b, *ldb); } /* Level 2 */ template -static void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const fp *alpha, const fp *a, const int *lda, const fp *x, const int *incx, - const fp *beta, fp *y, const int *incy); +static void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const fp* alpha, const fp* a, const int* lda, const fp* x, const int* incx, + const fp* beta, fp* y, const int* incy); template <> -void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const float *alpha, const float *a, const int *lda, const float *x, const int *incx, - const float *beta, float *y, const int *incy) { +void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const float* alpha, const float* a, const int* lda, const float* x, const int* incx, + const float* beta, float* y, const int* incy) { cblas_sgemv_wrapper(layout, trans, *m, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const double *alpha, const double *a, const int *lda, const double *x, const int *incx, - const double *beta, double *y, const int *incy) { +void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const double* alpha, const double* a, const int* lda, const double* x, const int* incx, + const double* beta, double* y, const int* incy) { cblas_dgemv_wrapper(layout, trans, *m, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_cgemv_wrapper(layout, trans, *m, *n, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_cgemv_wrapper(layout, trans, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template <> -void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zgemv_wrapper(layout, trans, *m, *n, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void gemv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zgemv_wrapper(layout, trans, *m, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template -static void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, - int *ku, const fp *alpha, const fp *a, const int *lda, const fp *x, - const int *incx, const fp *beta, fp *y, const int *incy); +static void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, + int* ku, const fp* alpha, const fp* a, const int* lda, const fp* x, + const int* incx, const fp* beta, fp* y, const int* incy); template <> -void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, int *ku, - const float *alpha, const float *a, const int *lda, const float *x, const int *incx, - const float *beta, float *y, const int *incy) { +void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, int* ku, + const float* alpha, const float* a, const int* lda, const float* x, const int* incx, + const float* beta, float* y, const int* incy) { cblas_sgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, int *ku, - const double *alpha, const double *a, const int *lda, const double *x, const int *incx, - const double *beta, double *y, const int *incy) { +void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, int* ku, + const double* alpha, const double* a, const int* lda, const double* x, const int* incx, + const double* beta, double* y, const int* incy) { cblas_dgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, int *ku, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_cgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, int* ku, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_cgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template <> -void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int *m, const int *n, int *kl, int *ku, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void gbmv(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int* m, const int* n, int* kl, int* ku, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zgbmv_wrapper(layout, trans, *m, *n, *kl, *ku, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template -static void ger(CBLAS_LAYOUT layout, const int *m, const int *n, const fp *alpha, const fp *x, - const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void ger(CBLAS_LAYOUT layout, const int* m, const int* n, const fp* alpha, const fp* x, + const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void ger(CBLAS_LAYOUT layout, const int *m, const int *n, const float *alpha, const float *x, - const int *incx, const float *y, const int *incy, float *a, const int *lda) { +void ger(CBLAS_LAYOUT layout, const int* m, const int* n, const float* alpha, const float* x, + const int* incx, const float* y, const int* incy, float* a, const int* lda) { cblas_sger_wrapper(layout, *m, *n, *alpha, x, *incx, y, *incy, a, *lda); } template <> -void ger(CBLAS_LAYOUT layout, const int *m, const int *n, const double *alpha, const double *x, - const int *incx, const double *y, const int *incy, double *a, const int *lda) { +void ger(CBLAS_LAYOUT layout, const int* m, const int* n, const double* alpha, const double* x, + const int* incx, const double* y, const int* incy, double* a, const int* lda) { cblas_dger_wrapper(layout, *m, *n, *alpha, x, *incx, y, *incy, a, *lda); } template -static void gerc(CBLAS_LAYOUT layout, const int *m, const int *n, const fp *alpha, const fp *x, - const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void gerc(CBLAS_LAYOUT layout, const int* m, const int* n, const fp* alpha, const fp* x, + const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void gerc(CBLAS_LAYOUT layout, const int *m, const int *n, const std::complex *alpha, - const std::complex *x, const int *incx, const std::complex *y, - const int *incy, std::complex *a, const int *lda) { - cblas_cgerc_wrapper(layout, *m, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void gerc(CBLAS_LAYOUT layout, const int* m, const int* n, const std::complex* alpha, + const std::complex* x, const int* incx, const std::complex* y, + const int* incy, std::complex* a, const int* lda) { + cblas_cgerc_wrapper(layout, *m, *n, (const void*)alpha, (const void*)x, *incx, (const void*)y, + *incy, (void*)a, *lda); } template <> -void gerc(CBLAS_LAYOUT layout, const int *m, const int *n, const std::complex *alpha, - const std::complex *x, const int *incx, const std::complex *y, - const int *incy, std::complex *a, const int *lda) { - cblas_zgerc_wrapper(layout, *m, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void gerc(CBLAS_LAYOUT layout, const int* m, const int* n, const std::complex* alpha, + const std::complex* x, const int* incx, const std::complex* y, + const int* incy, std::complex* a, const int* lda) { + cblas_zgerc_wrapper(layout, *m, *n, (const void*)alpha, (const void*)x, *incx, (const void*)y, + *incy, (void*)a, *lda); } template -static void geru(CBLAS_LAYOUT layout, const int *m, const int *n, const fp *alpha, const fp *x, - const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void geru(CBLAS_LAYOUT layout, const int* m, const int* n, const fp* alpha, const fp* x, + const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void geru(CBLAS_LAYOUT layout, const int *m, const int *n, const std::complex *alpha, - const std::complex *x, const int *incx, const std::complex *y, - const int *incy, std::complex *a, const int *lda) { - cblas_cgeru_wrapper(layout, *m, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void geru(CBLAS_LAYOUT layout, const int* m, const int* n, const std::complex* alpha, + const std::complex* x, const int* incx, const std::complex* y, + const int* incy, std::complex* a, const int* lda) { + cblas_cgeru_wrapper(layout, *m, *n, (const void*)alpha, (const void*)x, *incx, (const void*)y, + *incy, (void*)a, *lda); } template <> -void geru(CBLAS_LAYOUT layout, const int *m, const int *n, const std::complex *alpha, - const std::complex *x, const int *incx, const std::complex *y, - const int *incy, std::complex *a, const int *lda) { - cblas_zgeru_wrapper(layout, *m, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void geru(CBLAS_LAYOUT layout, const int* m, const int* n, const std::complex* alpha, + const std::complex* x, const int* incx, const std::complex* y, + const int* incy, std::complex* a, const int* lda) { + cblas_zgeru_wrapper(layout, *m, *n, (const void*)alpha, (const void*)x, *incx, (const void*)y, + *incy, (void*)a, *lda); } template -static void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const fp *alpha, const fp *a, const int *lda, const fp *x, const int *incx, - const fp *beta, fp *y, const int *incy); +static void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const fp* alpha, const fp* a, const int* lda, const fp* x, const int* incx, + const fp* beta, fp* y, const int* incy); template <> -void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_chbmv_wrapper(layout, upper_lower, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_chbmv_wrapper(layout, upper_lower, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template <> -void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zhbmv_wrapper(layout, upper_lower, *n, *k, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zhbmv_wrapper(layout, upper_lower, *n, *k, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template -static void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *a, const int *lda, const fp *x, const int *incx, const fp *beta, fp *y, - const int *incy); +static void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* a, const int* lda, const fp* x, const int* incx, const fp* beta, fp* y, + const int* incy); template <> -void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_chemv_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_chemv_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template <> -void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *a, const int *lda, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zhemv_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)a, *lda, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hemv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* a, const int* lda, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zhemv_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)a, *lda, + (const void*)x, *incx, (const void*)beta, (void*)y, *incy); } template -static void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp_scalar *alpha, - const fp_data *x, const int *incx, fp_data *a, const int *lda); +static void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp_scalar* alpha, + const fp_data* x, const int* incx, fp_data* a, const int* lda); template <> -void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const std::complex *x, const int *incx, std::complex *a, const int *lda) { - cblas_cher_wrapper(layout, upper_lower, *n, *alpha, (const void *)x, *incx, (void *)a, *lda); +void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const std::complex* x, const int* incx, std::complex* a, const int* lda) { + cblas_cher_wrapper(layout, upper_lower, *n, *alpha, (const void*)x, *incx, (void*)a, *lda); } template <> -void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const std::complex *x, const int *incx, std::complex *a, const int *lda) { - cblas_zher_wrapper(layout, upper_lower, *n, *alpha, (const void *)x, *incx, (void *)a, *lda); +void her(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const std::complex* x, const int* incx, std::complex* a, const int* lda) { + cblas_zher_wrapper(layout, upper_lower, *n, *alpha, (const void*)x, *incx, (void*)a, *lda); } template -static void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *x, const int *incx, - const std::complex *y, const int *incy, std::complex *a, const int *lda) { - cblas_cher2_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* x, const int* incx, + const std::complex* y, const int* incy, std::complex* a, const int* lda) { + cblas_cher2_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)x, *incx, + (const void*)y, *incy, (void*)a, *lda); } template <> -void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *x, const int *incx, - const std::complex *y, const int *incy, std::complex *a, const int *lda) { - cblas_zher2_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a, *lda); +void her2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* x, const int* incx, + const std::complex* y, const int* incy, std::complex* a, const int* lda) { + cblas_zher2_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)x, *incx, + (const void*)y, *incy, (void*)a, *lda); } template -static void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *a, const fp *x, const int *incx, const fp *beta, fp *y, const int *incy); +static void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* a, const fp* x, const int* incx, const fp* beta, fp* y, const int* incy); template <> -void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *a, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_chpmv_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)a, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* a, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_chpmv_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)a, (const void*)x, + *incx, (const void*)beta, (void*)y, *incy); } template <> -void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *a, - const std::complex *x, const int *incx, const std::complex *beta, - std::complex *y, const int *incy) { - cblas_zhpmv_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)a, - (const void *)x, *incx, (const void *)beta, (void *)y, *incy); +void hpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* a, + const std::complex* x, const int* incx, const std::complex* beta, + std::complex* y, const int* incy) { + cblas_zhpmv_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)a, (const void*)x, + *incx, (const void*)beta, (void*)y, *incy); } template -static void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp_scalar *alpha, - const fp_data *x, const int *incx, fp_data *a); +static void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp_scalar* alpha, + const fp_data* x, const int* incx, fp_data* a); template <> -void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const std::complex *x, const int *incx, std::complex *a) { - cblas_chpr_wrapper(layout, upper_lower, *n, *alpha, (const void *)x, *incx, (void *)a); +void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const std::complex* x, const int* incx, std::complex* a) { + cblas_chpr_wrapper(layout, upper_lower, *n, *alpha, (const void*)x, *incx, (void*)a); } template <> -void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const std::complex *x, const int *incx, std::complex *a) { - cblas_zhpr_wrapper(layout, upper_lower, *n, *alpha, (const void *)x, *incx, (void *)a); +void hpr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const std::complex* x, const int* incx, std::complex* a) { + cblas_zhpr_wrapper(layout, upper_lower, *n, *alpha, (const void*)x, *incx, (void*)a); } template -static void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, const fp *y, const int *incy, fp *a); +static void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, const fp* y, const int* incy, fp* a); template <> -void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *x, const int *incx, - const std::complex *y, const int *incy, std::complex *a) { - cblas_chpr2_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a); +void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* x, const int* incx, + const std::complex* y, const int* incy, std::complex* a) { + cblas_chpr2_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)x, *incx, + (const void*)y, *incy, (void*)a); } template <> -void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, - const std::complex *alpha, const std::complex *x, const int *incx, - const std::complex *y, const int *incy, std::complex *a) { - cblas_zhpr2_wrapper(layout, upper_lower, *n, (const void *)alpha, (const void *)x, *incx, - (const void *)y, *incy, (void *)a); +void hpr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, + const std::complex* alpha, const std::complex* x, const int* incx, + const std::complex* y, const int* incy, std::complex* a) { + cblas_zhpr2_wrapper(layout, upper_lower, *n, (const void*)alpha, (const void*)x, *incx, + (const void*)y, *incy, (void*)a); } template -static void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const fp *alpha, const fp *a, const int *lda, const fp *x, const int *incx, - const fp *beta, fp *y, const int *incy); +static void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const fp* alpha, const fp* a, const int* lda, const fp* x, const int* incx, + const fp* beta, fp* y, const int* incy); template <> -void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const float *alpha, const float *a, const int *lda, const float *x, const int *incx, - const float *beta, float *y, const int *incy) { +void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const float* alpha, const float* a, const int* lda, const float* x, const int* incx, + const float* beta, float* y, const int* incy) { cblas_ssbmv_wrapper(layout, upper_lower, *n, *k, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const int *k, - const double *alpha, const double *a, const int *lda, const double *x, const int *incx, - const double *beta, double *y, const int *incy) { +void sbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const int* k, + const double* alpha, const double* a, const int* lda, const double* x, const int* incx, + const double* beta, double* y, const int* incy) { cblas_dsbmv_wrapper(layout, upper_lower, *n, *k, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template -static void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *a, const int *lda, const fp *x, const int *incx, const fp *beta, fp *y, - const int *incy); +static void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* a, const int* lda, const fp* x, const int* incx, const fp* beta, fp* y, + const int* incy); template <> -void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *a, const int *lda, const float *x, const int *incx, const float *beta, - float *y, const int *incy) { +void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* a, const int* lda, const float* x, const int* incx, const float* beta, + float* y, const int* incy) { cblas_ssymv_wrapper(layout, upper_lower, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template <> -void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *a, const int *lda, const double *x, const int *incx, const double *beta, - double *y, const int *incy) { +void symv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* a, const int* lda, const double* x, const int* incx, const double* beta, + double* y, const int* incy) { cblas_dsymv_wrapper(layout, upper_lower, *n, *alpha, a, *lda, x, *incx, *beta, y, *incy); } template -static void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, fp *a, const int *lda); +static void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, fp* a, const int* lda); template <> -void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *x, const int *incx, float *a, const int *lda) { +void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* x, const int* incx, float* a, const int* lda) { cblas_ssyr_wrapper(layout, upper_lower, *n, *alpha, x, *incx, a, *lda); } template <> -void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *x, const int *incx, double *a, const int *lda) { +void syr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* x, const int* incx, double* a, const int* lda) { cblas_dsyr_wrapper(layout, upper_lower, *n, *alpha, x, *incx, a, *lda); } template -static void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, const fp *y, const int *incy, fp *a, const int *lda); +static void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, const fp* y, const int* incy, fp* a, const int* lda); template <> -void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *x, const int *incx, const float *y, const int *incy, float *a, - const int *lda) { +void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* x, const int* incx, const float* y, const int* incy, float* a, + const int* lda) { cblas_ssyr2_wrapper(layout, upper_lower, *n, *alpha, x, *incx, y, *incy, a, *lda); } template <> -void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *x, const int *incx, const double *y, const int *incy, double *a, - const int *lda) { +void syr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* x, const int* incx, const double* y, const int* incy, double* a, + const int* lda) { cblas_dsyr2_wrapper(layout, upper_lower, *n, *alpha, x, *incx, y, *incy, a, *lda); } template -static void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *a, const fp *x, const int *incx, const fp *beta, fp *y, const int *incy); +static void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* a, const fp* x, const int* incx, const fp* beta, fp* y, const int* incy); template <> -void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *a, const float *x, const int *incx, const float *beta, float *y, - const int *incy) { +void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* a, const float* x, const int* incx, const float* beta, float* y, + const int* incy) { cblas_sspmv_wrapper(layout, upper_lower, *n, *alpha, a, x, *incx, *beta, y, *incy); } template <> -void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *a, const double *x, const int *incx, const double *beta, double *y, - const int *incy) { +void spmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* a, const double* x, const int* incx, const double* beta, double* y, + const int* incy) { cblas_dspmv_wrapper(layout, upper_lower, *n, *alpha, a, x, *incx, *beta, y, *incy); } template -static void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, fp *a); +static void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, fp* a); template <> -void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *x, const int *incx, float *a) { +void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* x, const int* incx, float* a) { cblas_sspr_wrapper(layout, upper_lower, *n, *alpha, x, *incx, a); } template <> -void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *x, const int *incx, double *a) { +void spr(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* x, const int* incx, double* a) { cblas_dspr_wrapper(layout, upper_lower, *n, *alpha, x, *incx, a); } template -static void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const fp *alpha, - const fp *x, const int *incx, const fp *y, const int *incy, fp *a); +static void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const fp* alpha, + const fp* x, const int* incx, const fp* y, const int* incy, fp* a); template <> -void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const float *alpha, - const float *x, const int *incx, const float *y, const int *incy, float *a) { +void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const float* alpha, + const float* x, const int* incx, const float* y, const int* incy, float* a) { cblas_sspr2_wrapper(layout, upper_lower, *n, *alpha, x, *incx, y, *incy, a); } template <> -void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int *n, const double *alpha, - const double *x, const int *incx, const double *y, const int *incy, double *a) { +void spr2(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int* n, const double* alpha, + const double* x, const int* incx, const double* y, const int* incy, double* a) { cblas_dspr2_wrapper(layout, upper_lower, *n, *alpha, x, *incx, y, *incy, a); } template static void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const int *k, const fp *a, const int *lda, - fp *x, const int *incx); + CBLAS_DIAG unit_diag, const int* n, const int* k, const fp* a, const int* lda, + fp* x, const int* incx); template <> void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const float *a, const int *lda, float *x, const int *incx) { + const int* n, const int* k, const float* a, const int* lda, float* x, const int* incx) { cblas_stbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, a, *lda, x, *incx); } template <> void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const double *a, const int *lda, double *x, const int *incx) { + const int* n, const int* k, const double* a, const int* lda, double* x, const int* incx) { cblas_dtbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, a, *lda, x, *incx); } template <> void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const std::complex *a, const int *lda, - std::complex *x, const int *incx) { - cblas_ctbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void *)a, *lda, - (void *)x, *incx); + const int* n, const int* k, const std::complex* a, const int* lda, + std::complex* x, const int* incx) { + cblas_ctbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void*)a, *lda, + (void*)x, *incx); } template <> void tbmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const std::complex *a, const int *lda, - std::complex *x, const int *incx) { - cblas_ztbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void *)a, *lda, - (void *)x, *incx); + const int* n, const int* k, const std::complex* a, const int* lda, + std::complex* x, const int* incx) { + cblas_ztbmv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void*)a, *lda, + (void*)x, *incx); } template static void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const int *k, const fp *a, const int *lda, - fp *x, const int *incx); + CBLAS_DIAG unit_diag, const int* n, const int* k, const fp* a, const int* lda, + fp* x, const int* incx); template <> void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const float *a, const int *lda, float *x, const int *incx) { + const int* n, const int* k, const float* a, const int* lda, float* x, const int* incx) { cblas_stbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, a, *lda, x, *incx); } template <> void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const double *a, const int *lda, double *x, const int *incx) { + const int* n, const int* k, const double* a, const int* lda, double* x, const int* incx) { cblas_dtbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, a, *lda, x, *incx); } template <> void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const std::complex *a, const int *lda, - std::complex *x, const int *incx) { - cblas_ctbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void *)a, *lda, - (void *)x, *incx); + const int* n, const int* k, const std::complex* a, const int* lda, + std::complex* x, const int* incx) { + cblas_ctbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void*)a, *lda, + (void*)x, *incx); } template <> void tbsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const int *k, const std::complex *a, const int *lda, - std::complex *x, const int *incx) { - cblas_ztbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void *)a, *lda, - (void *)x, *incx); + const int* n, const int* k, const std::complex* a, const int* lda, + std::complex* x, const int* incx) { + cblas_ztbsv_wrapper(layout, upper_lower, trans, unit_diag, *n, *k, (const void*)a, *lda, + (void*)x, *incx); } template static void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const fp *a, fp *x, const int *incx); + CBLAS_DIAG unit_diag, const int* n, const fp* a, fp* x, const int* incx); template <> void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const float *a, float *x, const int *incx) { + const int* n, const float* a, float* x, const int* incx) { cblas_stpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, x, *incx); } template <> void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const double *a, double *x, const int *incx) { + const int* n, const double* a, double* x, const int* incx) { cblas_dtpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, x, *incx); } template <> void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, std::complex *x, const int *incx) { - cblas_ctpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, (void *)x, - *incx); + const int* n, const std::complex* a, std::complex* x, const int* incx) { + cblas_ctpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, (void*)x, *incx); } template <> void tpmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, std::complex *x, const int *incx) { - cblas_ztpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, (void *)x, - *incx); + const int* n, const std::complex* a, std::complex* x, const int* incx) { + cblas_ztpmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, (void*)x, *incx); } template static void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const fp *a, fp *x, const int *incx); + CBLAS_DIAG unit_diag, const int* n, const fp* a, fp* x, const int* incx); template <> void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const float *a, float *x, const int *incx) { + const int* n, const float* a, float* x, const int* incx) { cblas_stpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, x, *incx); } template <> void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const double *a, double *x, const int *incx) { + const int* n, const double* a, double* x, const int* incx) { cblas_dtpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, x, *incx); } template <> void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, std::complex *x, const int *incx) { - cblas_ctpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, (void *)x, - *incx); + const int* n, const std::complex* a, std::complex* x, const int* incx) { + cblas_ctpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, (void*)x, *incx); } template <> void tpsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, std::complex *x, const int *incx) { - cblas_ztpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, (void *)x, - *incx); + const int* n, const std::complex* a, std::complex* x, const int* incx) { + cblas_ztpsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, (void*)x, *incx); } template static void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const fp *a, const int *lda, fp *x, - const int *incx); + CBLAS_DIAG unit_diag, const int* n, const fp* a, const int* lda, fp* x, + const int* incx); template <> void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const float *a, const int *lda, float *x, const int *incx) { + const int* n, const float* a, const int* lda, float* x, const int* incx) { cblas_strmv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, *lda, x, *incx); } template <> void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const double *a, const int *lda, double *x, const int *incx) { + const int* n, const double* a, const int* lda, double* x, const int* incx) { cblas_dtrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, *lda, x, *incx); } template <> void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, const int *lda, std::complex *x, - const int *incx) { - cblas_ctrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, *lda, (void *)x, + const int* n, const std::complex* a, const int* lda, std::complex* x, + const int* incx) { + cblas_ctrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, *lda, (void*)x, *incx); } template <> void trmv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, const int *lda, std::complex *x, - const int *incx) { - cblas_ztrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, *lda, (void *)x, + const int* n, const std::complex* a, const int* lda, std::complex* x, + const int* incx) { + cblas_ztrmv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, *lda, (void*)x, *incx); } template static void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int *n, const fp *a, const int *lda, fp *x, - const int *incx); + CBLAS_DIAG unit_diag, const int* n, const fp* a, const int* lda, fp* x, + const int* incx); template <> void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const float *a, const int *lda, float *x, const int *incx) { + const int* n, const float* a, const int* lda, float* x, const int* incx) { cblas_strsv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, *lda, x, *incx); } template <> void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const double *a, const int *lda, double *x, const int *incx) { + const int* n, const double* a, const int* lda, double* x, const int* incx) { cblas_dtrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, a, *lda, x, *incx); } template <> void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, const int *lda, std::complex *x, - const int *incx) { - cblas_ctrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, *lda, (void *)x, + const int* n, const std::complex* a, const int* lda, std::complex* x, + const int* incx) { + cblas_ctrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, *lda, (void*)x, *incx); } template <> void trsv(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, - const int *n, const std::complex *a, const int *lda, std::complex *x, - const int *incx) { - cblas_ztrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void *)a, *lda, (void *)x, + const int* n, const std::complex* a, const int* lda, std::complex* x, + const int* incx) { + cblas_ztrsv_wrapper(layout, upper_lower, trans, unit_diag, *n, (const void*)a, *lda, (void*)x, *incx); } /* Level 1 */ template -static fp_res asum(const int *n, const fp_data *x, const int *incx); +static fp_res asum(const int* n, const fp_data* x, const int* incx); template <> -float asum(const int *n, const float *x, const int *incx) { +float asum(const int* n, const float* x, const int* incx) { return cblas_sasum_wrapper(*n, x, *incx); } template <> -double asum(const int *n, const double *x, const int *incx) { +double asum(const int* n, const double* x, const int* incx) { return cblas_dasum_wrapper(*n, x, *incx); } template <> -float asum(const int *n, const std::complex *x, const int *incx) { - return cblas_scasum_wrapper(*n, (const void *)x, *incx); +float asum(const int* n, const std::complex* x, const int* incx) { + return cblas_scasum_wrapper(*n, (const void*)x, *incx); } template <> -double asum(const int *n, const std::complex *x, const int *incx) { - return cblas_dzasum_wrapper(*n, (const void *)x, *incx); +double asum(const int* n, const std::complex* x, const int* incx) { + return cblas_dzasum_wrapper(*n, (const void*)x, *incx); } template -static void axpy(const int *n, const fp *alpha, const fp *x, const int *incx, fp *y, - const int *incy); +static void axpy(const int* n, const fp* alpha, const fp* x, const int* incx, fp* y, + const int* incy); template <> -void axpy(const int *n, const float *alpha, const float *x, const int *incx, float *y, - const int *incy) { +void axpy(const int* n, const float* alpha, const float* x, const int* incx, float* y, + const int* incy) { cblas_saxpy_wrapper(*n, *alpha, x, *incx, y, *incy); } template <> -void axpy(const int *n, const double *alpha, const double *x, const int *incx, double *y, - const int *incy) { +void axpy(const int* n, const double* alpha, const double* x, const int* incx, double* y, + const int* incy) { cblas_daxpy_wrapper(*n, *alpha, x, *incx, y, *incy); } template <> -void axpy(const int *n, const std::complex *alpha, const std::complex *x, - const int *incx, std::complex *y, const int *incy) { - cblas_caxpy_wrapper(*n, (const void *)alpha, (const void *)x, *incx, (void *)y, *incy); +void axpy(const int* n, const std::complex* alpha, const std::complex* x, + const int* incx, std::complex* y, const int* incy) { + cblas_caxpy_wrapper(*n, (const void*)alpha, (const void*)x, *incx, (void*)y, *incy); } template <> -void axpy(const int *n, const std::complex *alpha, const std::complex *x, - const int *incx, std::complex *y, const int *incy) { - cblas_zaxpy_wrapper(*n, (const void *)alpha, (const void *)x, *incx, (void *)y, *incy); +void axpy(const int* n, const std::complex* alpha, const std::complex* x, + const int* incx, std::complex* y, const int* incy) { + cblas_zaxpy_wrapper(*n, (const void*)alpha, (const void*)x, *incx, (void*)y, *incy); } template -static void copy(const int *n, const fp *x, const int *incx, fp *y, const int *incy); +static void copy(const int* n, const fp* x, const int* incx, fp* y, const int* incy); template <> -void copy(const int *n, const float *x, const int *incx, float *y, const int *incy) { +void copy(const int* n, const float* x, const int* incx, float* y, const int* incy) { cblas_scopy_wrapper(*n, x, *incx, y, *incy); } template <> -void copy(const int *n, const double *x, const int *incx, double *y, const int *incy) { +void copy(const int* n, const double* x, const int* incx, double* y, const int* incy) { cblas_dcopy_wrapper(*n, x, *incx, y, *incy); } template <> -void copy(const int *n, const std::complex *x, const int *incx, std::complex *y, - const int *incy) { - cblas_ccopy_wrapper(*n, (const void *)x, *incx, (void *)y, *incy); +void copy(const int* n, const std::complex* x, const int* incx, std::complex* y, + const int* incy) { + cblas_ccopy_wrapper(*n, (const void*)x, *incx, (void*)y, *incy); } template <> -void copy(const int *n, const std::complex *x, const int *incx, std::complex *y, - const int *incy) { - cblas_zcopy_wrapper(*n, (const void *)x, *incx, (void *)y, *incy); +void copy(const int* n, const std::complex* x, const int* incx, std::complex* y, + const int* incy) { + cblas_zcopy_wrapper(*n, (const void*)x, *incx, (void*)y, *incy); } template -static fp_res dot(const int *n, const fp *x, const int *incx, const fp *y, const int *incy); +static fp_res dot(const int* n, const fp* x, const int* incx, const fp* y, const int* incy); template <> -float dot(const int *n, const float *x, const int *incx, const float *y, const int *incy) { +float dot(const int* n, const float* x, const int* incx, const float* y, const int* incy) { return cblas_sdot_wrapper(*n, x, *incx, y, *incy); } template <> -double dot(const int *n, const double *x, const int *incx, const double *y, const int *incy) { +double dot(const int* n, const double* x, const int* incx, const double* y, const int* incy) { return cblas_ddot_wrapper(*n, x, *incx, y, *incy); } template <> -double dot(const int *n, const float *x, const int *incx, const float *y, const int *incy) { +double dot(const int* n, const float* x, const int* incx, const float* y, const int* incy) { return cblas_dsdot_wrapper(*n, x, *incx, y, *incy); } -static float sdsdot(const int *n, const float *sb, const float *x, const int *incx, const float *y, - const int *incy) { +static float sdsdot(const int* n, const float* sb, const float* x, const int* incx, const float* y, + const int* incy) { return cblas_sdsdot_wrapper(*n, *sb, x, *incx, y, *incy); } template -static fp_res nrm2(const int *n, const fp *x, const int *incx); +static fp_res nrm2(const int* n, const fp* x, const int* incx); template <> -float nrm2(const int *n, const float *x, const int *incx) { +float nrm2(const int* n, const float* x, const int* incx) { return cblas_snrm2_wrapper(*n, x, *incx); } template <> -double nrm2(const int *n, const double *x, const int *incx) { +double nrm2(const int* n, const double* x, const int* incx) { return cblas_dnrm2_wrapper(*n, x, *incx); } template <> -float nrm2(const int *n, const std::complex *x, const int *incx) { - return cblas_scnrm2_wrapper(*n, (const void *)x, *incx); +float nrm2(const int* n, const std::complex* x, const int* incx) { + return cblas_scnrm2_wrapper(*n, (const void*)x, *incx); } template <> -double nrm2(const int *n, const std::complex *x, const int *incx) { - return cblas_dznrm2_wrapper(*n, (const void *)x, *incx); +double nrm2(const int* n, const std::complex* x, const int* incx) { + return cblas_dznrm2_wrapper(*n, (const void*)x, *incx); } template -static void rot(const int *n, fp *x, const int *incx, fp *y, const int *incy, const fp_scalar *c, - const fp_scalar *s); +static void rot(const int* n, fp* x, const int* incx, fp* y, const int* incy, const fp_scalar* c, + const fp_scalar* s); template <> -void rot(const int *n, float *x, const int *incx, float *y, const int *incy, const float *c, - const float *s) { +void rot(const int* n, float* x, const int* incx, float* y, const int* incy, const float* c, + const float* s) { cblas_srot_wrapper(*n, x, *incx, y, *incy, *c, *s); } template <> -void rot(const int *n, double *x, const int *incx, double *y, const int *incy, const double *c, - const double *s) { +void rot(const int* n, double* x, const int* incx, double* y, const int* incy, const double* c, + const double* s) { cblas_drot_wrapper(*n, x, *incx, y, *incy, *c, *s); } template <> -void rot(const int *n, std::complex *x, const int *incx, std::complex *y, - const int *incy, const float *c, const float *s) { - csrot_wrapper(n, (void *)x, incx, (void *)y, incy, c, s); +void rot(const int* n, std::complex* x, const int* incx, std::complex* y, + const int* incy, const float* c, const float* s) { + csrot_wrapper(n, (void*)x, incx, (void*)y, incy, c, s); } template <> -void rot(const int *n, std::complex *x, const int *incx, std::complex *y, - const int *incy, const double *c, const double *s) { - zdrot_wrapper(n, (void *)x, incx, (void *)y, incy, c, s); +void rot(const int* n, std::complex* x, const int* incx, std::complex* y, + const int* incy, const double* c, const double* s) { + zdrot_wrapper(n, (void*)x, incx, (void*)y, incy, c, s); } template -static void rotg(fp *a, fp *b, fp_c *c, fp *s); +static void rotg(fp* a, fp* b, fp_c* c, fp* s); template <> -void rotg(float *a, float *b, float *c, float *s) { +void rotg(float* a, float* b, float* c, float* s) { cblas_srotg_wrapper(a, b, c, s); } template <> -void rotg(double *a, double *b, double *c, double *s) { +void rotg(double* a, double* b, double* c, double* s) { cblas_drotg_wrapper(a, b, c, s); } template <> -void rotg(std::complex *a, std::complex *b, float *c, std::complex *s) { - crotg_wrapper((void *)a, (void *)b, c, (void *)s); +void rotg(std::complex* a, std::complex* b, float* c, std::complex* s) { + crotg_wrapper((void*)a, (void*)b, c, (void*)s); } template <> -void rotg(std::complex *a, std::complex *b, double *c, std::complex *s) { - zrotg_wrapper((void *)a, (void *)b, c, (void *)s); +void rotg(std::complex* a, std::complex* b, double* c, std::complex* s) { + zrotg_wrapper((void*)a, (void*)b, c, (void*)s); } template -static void rotm(const int *n, fp *x, const int *incx, fp *y, const int *incy, const fp *param); +static void rotm(const int* n, fp* x, const int* incx, fp* y, const int* incy, const fp* param); template <> -void rotm(const int *n, float *x, const int *incx, float *y, const int *incy, const float *param) { +void rotm(const int* n, float* x, const int* incx, float* y, const int* incy, const float* param) { cblas_srotm_wrapper(*n, x, *incx, y, *incy, param); } template <> -void rotm(const int *n, double *x, const int *incx, double *y, const int *incy, - const double *param) { +void rotm(const int* n, double* x, const int* incx, double* y, const int* incy, + const double* param) { cblas_drotm_wrapper(*n, x, *incx, y, *incy, param); } template -static void rotmg(fp *d1, fp *d2, fp *x1, fp *y1, fp *param); +static void rotmg(fp* d1, fp* d2, fp* x1, fp* y1, fp* param); template <> -void rotmg(float *d1, float *d2, float *x1, float *y1, float *param) { +void rotmg(float* d1, float* d2, float* x1, float* y1, float* param) { cblas_srotmg_wrapper(d1, d2, x1, *y1, param); } template <> -void rotmg(double *d1, double *d2, double *x1, double *y1, double *param) { +void rotmg(double* d1, double* d2, double* x1, double* y1, double* param) { cblas_drotmg_wrapper(d1, d2, x1, *y1, param); } template -static void scal(const int *n, const fp_scalar *alpha, fp_data *x, const int *incx); +static void scal(const int* n, const fp_scalar* alpha, fp_data* x, const int* incx); template <> -void scal(const int *n, const float *alpha, float *x, const int *incx) { +void scal(const int* n, const float* alpha, float* x, const int* incx) { cblas_sscal_wrapper(*n, *alpha, x, *incx); } template <> -void scal(const int *n, const double *alpha, double *x, const int *incx) { +void scal(const int* n, const double* alpha, double* x, const int* incx) { cblas_dscal_wrapper(*n, *alpha, x, *incx); } template <> -void scal(const int *n, const std::complex *alpha, std::complex *x, const int *incx) { - cblas_cscal_wrapper(*n, (const void *)alpha, (void *)x, *incx); +void scal(const int* n, const std::complex* alpha, std::complex* x, const int* incx) { + cblas_cscal_wrapper(*n, (const void*)alpha, (void*)x, *incx); } template <> -void scal(const int *n, const std::complex *alpha, std::complex *x, - const int *incx) { - cblas_zscal_wrapper(*n, (const void *)alpha, (void *)x, *incx); +void scal(const int* n, const std::complex* alpha, std::complex* x, + const int* incx) { + cblas_zscal_wrapper(*n, (const void*)alpha, (void*)x, *incx); } template <> -void scal(const int *n, const float *alpha, std::complex *x, const int *incx) { - cblas_csscal_wrapper(*n, *alpha, (void *)x, *incx); +void scal(const int* n, const float* alpha, std::complex* x, const int* incx) { + cblas_csscal_wrapper(*n, *alpha, (void*)x, *incx); } template <> -void scal(const int *n, const double *alpha, std::complex *x, const int *incx) { - cblas_zdscal_wrapper(*n, *alpha, (void *)x, *incx); +void scal(const int* n, const double* alpha, std::complex* x, const int* incx) { + cblas_zdscal_wrapper(*n, *alpha, (void*)x, *incx); } template -static void swap(const int *n, fp *x, const int *incx, fp *y, const int *incy); +static void swap(const int* n, fp* x, const int* incx, fp* y, const int* incy); template <> -void swap(const int *n, float *x, const int *incx, float *y, const int *incy) { +void swap(const int* n, float* x, const int* incx, float* y, const int* incy) { cblas_sswap_wrapper(*n, x, *incx, y, *incy); } template <> -void swap(const int *n, double *x, const int *incx, double *y, const int *incy) { +void swap(const int* n, double* x, const int* incx, double* y, const int* incy) { cblas_dswap_wrapper(*n, x, *incx, y, *incy); } template <> -void swap(const int *n, std::complex *x, const int *incx, std::complex *y, - const int *incy) { - cblas_cswap_wrapper(*n, (void *)x, *incx, (void *)y, *incy); +void swap(const int* n, std::complex* x, const int* incx, std::complex* y, + const int* incy) { + cblas_cswap_wrapper(*n, (void*)x, *incx, (void*)y, *incy); } template <> -void swap(const int *n, std::complex *x, const int *incx, std::complex *y, - const int *incy) { - cblas_zswap_wrapper(*n, (void *)x, *incx, (void *)y, *incy); +void swap(const int* n, std::complex* x, const int* incx, std::complex* y, + const int* incy) { + cblas_zswap_wrapper(*n, (void*)x, *incx, (void*)y, *incy); } template -static void dotc(fp *pres, const int *n, const fp *x, const int *incx, const fp *y, - const int *incy); +static void dotc(fp* pres, const int* n, const fp* x, const int* incx, const fp* y, + const int* incy); template <> -void dotc(std::complex *pres, const int *n, const std::complex *x, const int *incx, - const std::complex *y, const int *incy) { - cblas_cdotc_sub_wrapper(*n, (const void *)x, *incx, (const void *)y, *incy, (void *)pres); +void dotc(std::complex* pres, const int* n, const std::complex* x, const int* incx, + const std::complex* y, const int* incy) { + cblas_cdotc_sub_wrapper(*n, (const void*)x, *incx, (const void*)y, *incy, (void*)pres); } template <> -void dotc(std::complex *pres, const int *n, const std::complex *x, const int *incx, - const std::complex *y, const int *incy) { - cblas_zdotc_sub_wrapper(*n, (const void *)x, *incx, (const void *)y, *incy, (void *)pres); +void dotc(std::complex* pres, const int* n, const std::complex* x, const int* incx, + const std::complex* y, const int* incy) { + cblas_zdotc_sub_wrapper(*n, (const void*)x, *incx, (const void*)y, *incy, (void*)pres); } template -static void dotu(fp *pres, const int *n, const fp *x, const int *incx, const fp *y, - const int *incy); +static void dotu(fp* pres, const int* n, const fp* x, const int* incx, const fp* y, + const int* incy); template <> -void dotu(std::complex *pres, const int *n, const std::complex *x, const int *incx, - const std::complex *y, const int *incy) { - cblas_cdotu_sub_wrapper(*n, (const void *)x, *incx, (const void *)y, *incy, (void *)pres); +void dotu(std::complex* pres, const int* n, const std::complex* x, const int* incx, + const std::complex* y, const int* incy) { + cblas_cdotu_sub_wrapper(*n, (const void*)x, *incx, (const void*)y, *incy, (void*)pres); } template <> -void dotu(std::complex *pres, const int *n, const std::complex *x, const int *incx, - const std::complex *y, const int *incy) { - cblas_zdotu_sub_wrapper(*n, (const void *)x, *incx, (const void *)y, *incy, (void *)pres); +void dotu(std::complex* pres, const int* n, const std::complex* x, const int* incx, + const std::complex* y, const int* incy) { + cblas_zdotu_sub_wrapper(*n, (const void*)x, *incx, (const void*)y, *incy, (void*)pres); } template -static int iamax(const int *n, const fp *x, const int *incx); +static int iamax(const int* n, const fp* x, const int* incx); template <> -int iamax(const int *n, const float *x, const int *incx) { +int iamax(const int* n, const float* x, const int* incx) { return cblas_isamax_wrapper(*n, x, *incx); } template <> -int iamax(const int *n, const double *x, const int *incx) { +int iamax(const int* n, const double* x, const int* incx) { return cblas_idamax_wrapper(*n, x, *incx); } template <> -int iamax(const int *n, const std::complex *x, const int *incx) { - return cblas_icamax_wrapper(*n, (const void *)x, *incx); +int iamax(const int* n, const std::complex* x, const int* incx) { + return cblas_icamax_wrapper(*n, (const void*)x, *incx); } template <> -int iamax(const int *n, const std::complex *x, const int *incx) { - return cblas_izamax_wrapper(*n, (const void *)x, *incx); +int iamax(const int* n, const std::complex* x, const int* incx) { + return cblas_izamax_wrapper(*n, (const void*)x, *incx); } inline float abs_val(float val) { @@ -1425,10 +1421,10 @@ inline double abs_val(std::complex val) { } template -static int iamin(const int *n, const fp *x, const int *incx); +static int iamin(const int* n, const fp* x, const int* incx); template <> -int iamin(const int *n, const float *x, const int *incx) { +int iamin(const int* n, const float* x, const int* incx) { if (*n < 1 || *incx < 1) { return 0; } @@ -1451,7 +1447,7 @@ int iamin(const int *n, const float *x, const int *incx) { } template <> -int iamin(const int *n, const double *x, const int *incx) { +int iamin(const int* n, const double* x, const int* incx) { if (*n < 1 || *incx < 1) { return 0; } @@ -1474,7 +1470,7 @@ int iamin(const int *n, const double *x, const int *incx) { } template <> -int iamin(const int *n, const std::complex *x, const int *incx) { +int iamin(const int* n, const std::complex* x, const int* incx) { if (*n < 1 || *incx < 1) { return 0; } @@ -1497,7 +1493,7 @@ int iamin(const int *n, const std::complex *x, const int *incx) { } template <> -int iamin(const int *n, const std::complex *x, const int *incx) { +int iamin(const int* n, const std::complex* x, const int* incx) { if (*n < 1 || *incx < 1) { return 0; } @@ -1522,12 +1518,12 @@ int iamin(const int *n, const std::complex *x, const int *incx) { /* Extensions */ template -static void axpby(const int *n, const fp *alpha, const fp *x, const int *incx, const fp *beta, - fp *y, const int *incy); +static void axpby(const int* n, const fp* alpha, const fp* x, const int* incx, const fp* beta, + fp* y, const int* incy); template <> -void axpby(const int *n, const float *alpha, const float *x, const int *incx, const float *beta, - float *y, const int *incy) { +void axpby(const int* n, const float* alpha, const float* x, const int* incx, const float* beta, + float* y, const int* incy) { // Not supported in NETLIB. Reference C++ implementation is used. int idx = (*incx) > 0 ? 0 : (1 - *n) * (*incx); int idy = (*incy) > 0 ? 0 : (1 - *n) * (*incy); @@ -1536,8 +1532,8 @@ void axpby(const int *n, const float *alpha, const float *x, const int *incx, co } template <> -void axpby(const int *n, const double *alpha, const double *x, const int *incx, const double *beta, - double *y, const int *incy) { +void axpby(const int* n, const double* alpha, const double* x, const int* incx, const double* beta, + double* y, const int* incy) { // Not supported in NETLIB. Reference C++ implementation is used. int idx = (*incx) > 0 ? 0 : (1 - *n) * (*incx); int idy = (*incy) > 0 ? 0 : (1 - *n) * (*incy); @@ -1546,9 +1542,9 @@ void axpby(const int *n, const double *alpha, const double *x, const int *incx, } template <> -void axpby(const int *n, const std::complex *alpha, const std::complex *x, - const int *incx, const std::complex *beta, std::complex *y, - const int *incy) { +void axpby(const int* n, const std::complex* alpha, const std::complex* x, + const int* incx, const std::complex* beta, std::complex* y, + const int* incy) { // Not supported in NETLIB. Reference C++ implementation is used. int idx = (*incx) > 0 ? 0 : (1 - *n) * (*incx); int idy = (*incy) > 0 ? 0 : (1 - *n) * (*incy); @@ -1557,9 +1553,9 @@ void axpby(const int *n, const std::complex *alpha, const std::complex -void axpby(const int *n, const std::complex *alpha, const std::complex *x, - const int *incx, const std::complex *beta, std::complex *y, - const int *incy) { +void axpby(const int* n, const std::complex* alpha, const std::complex* x, + const int* incx, const std::complex* beta, std::complex* y, + const int* incy) { // Not supported in NETLIB. Reference C++ implementation is used. int idx = (*incx) > 0 ? 0 : (1 - *n) * (*incx); int idy = (*incy) > 0 ? 0 : (1 - *n) * (*incy); @@ -1569,16 +1565,16 @@ void axpby(const int *n, const std::complex *alpha, const std::complex static void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, - const fps *alpha, const fpa *a, const int *lda, const fpa *ao, const fpb *b, - const int *ldb, const fpb *bo, const fps *beta, fpc *c, const int *ldc, - const fpc *co); + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, + const fps* alpha, const fpa* a, const int* lda, const fpa* ao, const fpb* b, + const int* ldb, const fpb* bo, const fps* beta, fpc* c, const int* ldc, + const fpc* co); template <> void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, const float *alpha, - const int8_t *a, const int *lda, const int8_t *ao, const int8_t *b, const int *ldb, - const int8_t *bo, const float *beta, int32_t *c, const int *ldc, const int32_t *co) { + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, const float* alpha, + const int8_t* a, const int* lda, const int8_t* ao, const int8_t* b, const int* ldb, + const int8_t* bo, const float* beta, int32_t* c, const int* ldc, const int32_t* co) { // Not supported in NETLIB. DGEMM is used as reference. int sizea, sizeb, sizec; if (layout == CblasColMajor) { @@ -1591,9 +1587,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - double *ad = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea); - double *bd = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb); - double *cd = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); + double* ad = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea); + double* bd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb); + double* cd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); double alphad = *alpha; double betad = *beta; double aod = *ao; @@ -1611,10 +1607,10 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran template <> void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, const float *alpha, - const int8_t *a, const int *lda, const int8_t *ao, const uint8_t *b, const int *ldb, - const uint8_t *bo, const float *beta, int32_t *c, const int *ldc, - const int32_t *co) { + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, const float* alpha, + const int8_t* a, const int* lda, const int8_t* ao, const uint8_t* b, const int* ldb, + const uint8_t* bo, const float* beta, int32_t* c, const int* ldc, + const int32_t* co) { // Not supported in NETLIB. DGEMM is used as reference. int sizea, sizeb, sizec; if (layout == CblasColMajor) { @@ -1627,9 +1623,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - double *ad = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea); - double *bd = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb); - double *cd = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); + double* ad = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea); + double* bd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb); + double* cd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); double alphad = *alpha; double betad = *beta; double aod = *ao; @@ -1647,9 +1643,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran template <> void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, const float *alpha, - const uint8_t *a, const int *lda, const uint8_t *ao, const int8_t *b, const int *ldb, - const int8_t *bo, const float *beta, int32_t *c, const int *ldc, const int32_t *co) { + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, const float* alpha, + const uint8_t* a, const int* lda, const uint8_t* ao, const int8_t* b, const int* ldb, + const int8_t* bo, const float* beta, int32_t* c, const int* ldc, const int32_t* co) { // Not supported in NETLIB. DGEMM is used as reference. int sizea, sizeb, sizec; if (layout == CblasColMajor) { @@ -1662,9 +1658,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - double *ad = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea); - double *bd = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb); - double *cd = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); + double* ad = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea); + double* bd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb); + double* cd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); double alphad = *alpha; double betad = *beta; double aod = *ao; @@ -1682,10 +1678,10 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran template <> void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - CBLAS_OFFSET offsetc, const int *m, const int *n, const int *k, const float *alpha, - const uint8_t *a, const int *lda, const uint8_t *ao, const uint8_t *b, - const int *ldb, const uint8_t *bo, const float *beta, int32_t *c, const int *ldc, - const int32_t *co) { + CBLAS_OFFSET offsetc, const int* m, const int* n, const int* k, const float* alpha, + const uint8_t* a, const int* lda, const uint8_t* ao, const uint8_t* b, + const int* ldb, const uint8_t* bo, const float* beta, int32_t* c, const int* ldc, + const int32_t* co) { // Not supported in NETLIB. DGEMM is used as reference. int sizea, sizeb, sizec; if (layout == CblasColMajor) { @@ -1698,9 +1694,9 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran sizeb = (transb == CblasNoTrans) ? *ldb * *k : *ldb * *n; sizec = *ldc * *m; } - double *ad = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea); - double *bd = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb); - double *cd = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); + double* ad = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizea); + double* bd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizeb); + double* cd = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); double alphad = *alpha; double betad = *beta; double aod = *ao; @@ -1718,19 +1714,19 @@ void gemm_bias(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE tran template static void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const fp *alpha, const fp *a, - const int *lda, const fp *b, const int *ldb, const fp *beta, fp *c, - const int *ldc); + CBLAS_TRANSPOSE transb, const int* n, const int* k, const fp* alpha, const fp* a, + const int* lda, const fp* b, const int* ldb, const fp* beta, fp* c, + const int* ldc); template <> void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const float *alpha, const float *a, - const int *lda, const float *b, const int *ldb, const float *beta, float *c, - const int *ldc) { + CBLAS_TRANSPOSE transb, const int* n, const int* k, const float* alpha, const float* a, + const int* lda, const float* b, const int* ldb, const float* beta, float* c, + const int* ldc) { // Not supported in NETLIB. SGEMM is used as reference. int sizec; sizec = *ldc * *n; - float *cf = (float *)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizec); + float* cf = (float*)oneapi::mkl::aligned_alloc(64, sizeof(float) * sizec); update_c(c, layout, upper_lower, *n, *n, *ldc, cf); cblas_sgemm_wrapper(layout, transa, transb, *n, *n, *k, *alpha, a, *lda, b, *ldb, *beta, cf, *ldc); @@ -1740,13 +1736,13 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, template <> void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const double *alpha, const double *a, - const int *lda, const double *b, const int *ldb, const double *beta, double *c, - const int *ldc) { + CBLAS_TRANSPOSE transb, const int* n, const int* k, const double* alpha, const double* a, + const int* lda, const double* b, const int* ldb, const double* beta, double* c, + const int* ldc) { // Not supported in NETLIB. DGEMM is used as reference. int sizec; sizec = *ldc * *n; - double *cf = (double *)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); + double* cf = (double*)oneapi::mkl::aligned_alloc(64, sizeof(double) * sizec); update_c(c, layout, upper_lower, *n, *n, *ldc, cf); cblas_dgemm_wrapper(layout, transa, transb, *n, *n, *k, *alpha, a, *lda, b, *ldb, *beta, cf, *ldc); @@ -1756,15 +1752,15 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, template <> void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *b, - const int *ldb, const std::complex *beta, std::complex *c, - const int *ldc) { + CBLAS_TRANSPOSE transb, const int* n, const int* k, const std::complex* alpha, + const std::complex* a, const int* lda, const std::complex* b, + const int* ldb, const std::complex* beta, std::complex* c, + const int* ldc) { // Not supported in NETLIB. CGEMM is used as reference. int sizec; sizec = *ldc * *n; - std::complex *cf = - (std::complex *)oneapi::mkl::aligned_alloc(64, sizeof(std::complex) * sizec); + std::complex* cf = + (std::complex*)oneapi::mkl::aligned_alloc(64, sizeof(std::complex) * sizec); update_c(c, layout, upper_lower, *n, *n, *ldc, cf); cblas_cgemm_wrapper(layout, transa, transb, *n, *n, *k, alpha, a, *lda, b, *ldb, beta, cf, *ldc); @@ -1774,15 +1770,15 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, template <> void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, - CBLAS_TRANSPOSE transb, const int *n, const int *k, const std::complex *alpha, - const std::complex *a, const int *lda, const std::complex *b, - const int *ldb, const std::complex *beta, std::complex *c, - const int *ldc) { + CBLAS_TRANSPOSE transb, const int* n, const int* k, const std::complex* alpha, + const std::complex* a, const int* lda, const std::complex* b, + const int* ldb, const std::complex* beta, std::complex* c, + const int* ldc) { // Not supported in NETLIB. ZGEMM is used as reference. int sizec; sizec = *ldc * *n; - std::complex *cf = (std::complex *)oneapi::mkl::aligned_alloc( - 64, sizeof(std::complex) * sizec); + std::complex* cf = + (std::complex*)oneapi::mkl::aligned_alloc(64, sizeof(std::complex) * sizec); update_c(c, layout, upper_lower, *n, *n, *ldc, cf); cblas_zgemm_wrapper(layout, transa, transb, *n, *n, *k, alpha, a, *lda, b, *ldb, beta, cf, *ldc); @@ -1791,12 +1787,12 @@ void gemmt(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE transa, } template -static void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, - const fp *a, const int *lda, const fp *x, const int *incx, fp *c, const int *ldc); +static void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, + const fp* a, const int* lda, const fp* x, const int* incx, fp* c, const int* ldc); template <> -void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, const float *a, - const int *lda, const float *x, const int *incx, float *c, const int *ldc) { +void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, const float* a, + const int* lda, const float* x, const int* incx, float* c, const int* ldc) { // Not supported in NETLIB. Reference C++ implementation is used. float tmp; int size_x = (left_right == CblasLeft) ? *m : *n; @@ -1827,8 +1823,8 @@ void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n } template <> -void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, const double *a, - const int *lda, const double *x, const int *incx, double *c, const int *ldc) { +void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, const double* a, + const int* lda, const double* x, const int* incx, double* c, const int* ldc) { // Not supported in NETLIB. Reference C++ implementation is used. double tmp; int size_x = (left_right == CblasLeft) ? *m : *n; @@ -1859,9 +1855,9 @@ void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n } template <> -void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, - const std::complex *a, const int *lda, const std::complex *x, - const int *incx, std::complex *c, const int *ldc) { +void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, + const std::complex* a, const int* lda, const std::complex* x, + const int* incx, std::complex* c, const int* ldc) { // Not supported in NETLIB. Reference C++ implementation is used. std::complex tmp; int size_x = (left_right == CblasLeft) ? *m : *n; @@ -1912,9 +1908,9 @@ void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n } template <> -void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int *m, const int *n, - const std::complex *a, const int *lda, const std::complex *x, - const int *incx, std::complex *c, const int *ldc) { +void dgmm(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, const int* m, const int* n, + const std::complex* a, const int* lda, const std::complex* x, + const int* incx, std::complex* c, const int* ldc) { // Not supported in NETLIB. Reference C++ implementation is used. std::complex tmp; int size_x = (left_right == CblasLeft) ? *m : *n; @@ -1979,7 +1975,7 @@ fp sametype_conj(fp x) { template void omatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int64_t m, int64_t n, - fp alpha, fp *A, int64_t lda, fp *B, int64_t ldb) { + fp alpha, fp* A, int64_t lda, fp* B, int64_t ldb) { int64_t logical_m, logical_n; if (layout == oneapi::mkl::layout::col_major) { logical_m = m; @@ -2014,9 +2010,9 @@ void omatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int6 } template -void omatcopy2_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, const int64_t &m, - const int64_t &n, const fp &alpha, const fp *in_matrix, const int64_t &ld_in, - const int64_t &inc_in, fp *out_matrix, const int64_t &ld_out, +void omatcopy2_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, const int64_t& m, + const int64_t& n, const fp& alpha, const fp* in_matrix, const int64_t& ld_in, + const int64_t& inc_in, fp* out_matrix, const int64_t& ld_out, const int64_t inc_out) { int64_t logical_m, logical_n; if (layout == oneapi::mkl::layout::col_major) { @@ -2061,7 +2057,7 @@ void omatcopy2_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, con template void imatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int64_t m, int64_t n, - fp alpha, fp *A, int64_t lda, int64_t ldb) { + fp alpha, fp* A, int64_t lda, int64_t ldb) { int64_t logical_m, logical_n; if (layout == oneapi::mkl::layout::col_major) { logical_m = m; @@ -2115,8 +2111,8 @@ void imatcopy_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int6 template void omatadd_ref(oneapi::mkl::layout layout, oneapi::mkl::transpose transa, - oneapi::mkl::transpose transb, int64_t m, int64_t n, fp alpha, fp *A, int64_t lda, - fp beta, fp *B, int64_t ldb, fp *C, int64_t ldc) { + oneapi::mkl::transpose transb, int64_t m, int64_t n, fp alpha, fp* A, int64_t lda, + fp beta, fp* B, int64_t ldb, fp* C, int64_t ldc) { int64_t logical_m, logical_n; if (layout == oneapi::mkl::layout::col_major) { logical_m = m; diff --git a/tests/unit_tests/blas/include/reference_blas_wrappers.hpp b/tests/unit_tests/blas/include/reference_blas_wrappers.hpp index 977d14ca4..bac1a76a8 100644 --- a/tests/unit_tests/blas/include/reference_blas_wrappers.hpp +++ b/tests/unit_tests/blas/include/reference_blas_wrappers.hpp @@ -27,7 +27,7 @@ #ifdef __linux__ #include -#define LIB_TYPE void * +#define LIB_TYPE void* #define GET_LIB_HANDLE(libname) dlopen((libname), RTLD_LAZY | RTLD_LOCAL | RTLD_DEEPBIND) #define GET_FUNC(lib, fn) dlsym(lib, (fn)) #elif defined(_WIN64) @@ -68,129 +68,129 @@ static LIB_TYPE cblas_library() { static void (*cblas_sgemm_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, const float alpha, - const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc); + const float* a, const int lda, const float* b, const int ldb, + const float beta, float* c, const int ldc); static void (*cblas_dgemm_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, const double alpha, - const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc); + const double* a, const int lda, const double* b, const int ldb, + const double beta, double* c, const int ldc); static void (*cblas_cgemm_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, - const void *a, const int lda, const void *b, const int ldb, - const void *beta, void *c, const int ldc); + const int m, const int n, const int k, const void* alpha, + const void* a, const int lda, const void* b, const int ldb, + const void* beta, void* c, const int ldc); static void (*cblas_zgemm_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, - const void *a, const int lda, const void *b, const int ldb, - const void *beta, void *c, const int ldc); + const int m, const int n, const int k, const void* alpha, + const void* a, const int lda, const void* b, const int ldb, + const void* beta, void* c, const int ldc); static void (*cblas_ssymm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, - float *c, const int ldc); + const int m, const int n, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, + float* c, const int ldc); static void (*cblas_dsymm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const double alpha, const double *a, - const int lda, const double *b, const int ldb, const double beta, - double *c, const int ldc); + const int m, const int n, const double alpha, const double* a, + const int lda, const double* b, const int ldb, const double beta, + double* c, const int ldc); static void (*cblas_csymm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc); static void (*cblas_zsymm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc); static void (*cblas_ssyrk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float beta, float *c, const int ldc); + const int n, const int k, const float alpha, const float* a, + const int lda, const float beta, float* c, const int ldc); static void (*cblas_dsyrk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double beta, double *c, const int ldc); + const int n, const int k, const double alpha, const double* a, + const int lda, const double beta, double* c, const int ldc); static void (*cblas_csyrk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc); static void (*cblas_zsyrk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc); static void (*cblas_chemm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc); static void (*cblas_zhemm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc); static void (*cblas_cherk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const void *a, - const int lda, const float beta, void *c, const int ldc); + const int n, const int k, const float alpha, const void* a, + const int lda, const float beta, void* c, const int ldc); static void (*cblas_zherk_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const void *a, - const int lda, const double beta, void *c, const int ldc); + const int n, const int k, const double alpha, const void* a, + const int lda, const double beta, void* c, const int ldc); static void (*cblas_ssyr2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, - float *c, const int ldc); + const int n, const int k, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, + float* c, const int ldc); static void (*cblas_dsyr2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double *b, const int ldb, const double beta, - double *c, const int ldc); + const int n, const int k, const double alpha, const double* a, + const int lda, const double* b, const int ldb, const double beta, + double* c, const int ldc); static void (*cblas_csyr2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc); static void (*cblas_zsyr2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc); static void (*cblas_cher2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const float beta, - void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const float beta, + void* c, const int ldc); static void (*cblas_zher2k_p)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const double beta, - void *c, const int ldc); + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const double beta, + void* c, const int ldc); static void (*cblas_strmm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const float alpha, const float *a, const int lda, float *b, + const float alpha, const float* a, const int lda, float* b, const int ldb); static void (*cblas_dtrmm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const double alpha, const double *a, const int lda, double *b, + const double alpha, const double* a, const int lda, double* b, const int ldb); static void (*cblas_ctrmm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb); static void (*cblas_ztrmm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb); static void (*cblas_strsm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const float alpha, const float *a, const int lda, float *b, + const float alpha, const float* a, const int lda, float* b, const int ldb); static void (*cblas_dtrsm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const double alpha, const double *a, const int lda, double *b, + const double alpha, const double* a, const int lda, double* b, const int ldb); static void (*cblas_ctrsm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb); static void (*cblas_ztrsm_p)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb); static void cblas_sgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, const float alpha, - const float *a, const int lda, const float *b, const int ldb, - const float beta, float *c, const int ldc) { + const float* a, const int lda, const float* b, const int ldb, + const float beta, float* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_sgemm_p == NULL) cblas_sgemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, float *c, + const int m, const int n, const int k, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, float* c, const int ldc))GET_FUNC(h_libcblas, "cblas_sgemm"); if (cblas_sgemm_p != NULL) cblas_sgemm_p(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -199,14 +199,14 @@ static void cblas_sgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBL static void cblas_dgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, const double alpha, - const double *a, const int lda, const double *b, const int ldb, - const double beta, double *c, const int ldc) { + const double* a, const int lda, const double* b, const int ldb, + const double beta, double* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_dgemm_p == NULL) cblas_dgemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, const int m, const int n, const int k, - const double alpha, const double *a, const int lda, - const double *b, const int ldb, const double beta, double *c, + const double alpha, const double* a, const int lda, + const double* b, const int ldb, const double beta, double* c, const int ldc))GET_FUNC(h_libcblas, "cblas_dgemm"); if (cblas_dgemm_p != NULL) cblas_dgemm_p(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -214,15 +214,15 @@ static void cblas_dgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBL } static void cblas_cgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, - const void *a, const int lda, const void *b, const int ldb, - const void *beta, void *c, const int ldc) { + const int m, const int n, const int k, const void* alpha, + const void* a, const int lda, const void* b, const int ldb, + const void* beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_cgemm_p == NULL) cblas_cgemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_cgemm"); if (cblas_cgemm_p != NULL) cblas_cgemm_p(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -230,15 +230,15 @@ static void cblas_cgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBL } static void cblas_zgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, - const void *a, const int lda, const void *b, const int ldb, - const void *beta, void *c, const int ldc) { + const int m, const int n, const int k, const void* alpha, + const void* a, const int lda, const void* b, const int ldb, + const void* beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zgemm_p == NULL) cblas_zgemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBLAS_TRANSPOSE transb, - const int m, const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, void *c, + const int m, const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zgemm"); if (cblas_zgemm_p != NULL) cblas_zgemm_p(layout, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -246,15 +246,15 @@ static void cblas_zgemm_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE transa, CBL } static void cblas_ssymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, - float *c, const int ldc) { + const int m, const int n, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, + float* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_ssymm_p == NULL) cblas_ssymm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int m, - const int n, const float alpha, const float *a, const int lda, - const float *b, const int ldb, const float beta, float *c, + const int n, const float alpha, const float* a, const int lda, + const float* b, const int ldb, const float beta, float* c, const int ldc))GET_FUNC(h_libcblas, "cblas_ssymm"); if (cblas_ssymm_p != NULL) cblas_ssymm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); @@ -262,15 +262,15 @@ static void cblas_ssymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLA } static void cblas_dsymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const double alpha, const double *a, - const int lda, const double *b, const int ldb, const double beta, - double *c, const int ldc) { + const int m, const int n, const double alpha, const double* a, + const int lda, const double* b, const int ldb, const double beta, + double* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_dsymm_p == NULL) cblas_dsymm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, const int m, - const int n, const double alpha, const double *a, const int lda, - const double *b, const int ldb, const double beta, double *c, + const int n, const double alpha, const double* a, const int lda, + const double* b, const int ldb, const double beta, double* c, const int ldc))GET_FUNC(h_libcblas, "cblas_dsymm"); if (cblas_dsymm_p != NULL) cblas_dsymm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); @@ -278,43 +278,43 @@ static void cblas_dsymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLA } static void cblas_csymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_csymm_p == NULL) cblas_csymm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc))GET_FUNC(h_libcblas, "cblas_csymm"); + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_csymm"); if (cblas_csymm_p != NULL) cblas_csymm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } } static void cblas_zsymm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zsymm_p == NULL) cblas_zsymm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc))GET_FUNC(h_libcblas, "cblas_zsymm"); + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zsymm"); if (cblas_zsymm_p != NULL) cblas_zsymm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } } static void cblas_ssyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float beta, float *c, const int ldc) { + const int n, const int k, const float alpha, const float* a, + const int lda, const float beta, float* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_ssyrk_p == NULL) cblas_ssyrk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float beta, float *c, + const int n, const int k, const float alpha, const float* a, + const int lda, const float beta, float* c, const int ldc))GET_FUNC(h_libcblas, "cblas_ssyrk"); if (cblas_ssyrk_p != NULL) cblas_ssyrk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -322,13 +322,13 @@ static void cblas_ssyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_dsyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double beta, double *c, const int ldc) { + const int n, const int k, const double alpha, const double* a, + const int lda, const double beta, double* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_dsyrk_p == NULL) cblas_dsyrk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double beta, double *c, + const int n, const int k, const double alpha, const double* a, + const int lda, const double beta, double* c, const int ldc))GET_FUNC(h_libcblas, "cblas_dsyrk"); if (cblas_dsyrk_p != NULL) cblas_dsyrk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -336,13 +336,13 @@ static void cblas_dsyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_csyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_csyrk_p == NULL) cblas_csyrk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_csyrk"); if (cblas_csyrk_p != NULL) cblas_csyrk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -350,13 +350,13 @@ static void cblas_csyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_zsyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zsyrk_p == NULL) cblas_zsyrk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *beta, void *c, + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zsyrk"); if (cblas_zsyrk_p != NULL) cblas_zsyrk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -364,43 +364,43 @@ static void cblas_zsyrk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_chemm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_chemm_p == NULL) cblas_chemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc))GET_FUNC(h_libcblas, "cblas_chemm"); + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_chemm"); if (cblas_chemm_p != NULL) cblas_chemm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } } static void cblas_zhemm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zhemm_p == NULL) cblas_zhemm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE left_right, CBLAS_UPLO uplo, - const int m, const int n, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc))GET_FUNC(h_libcblas, "cblas_zhemm"); + const int m, const int n, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zhemm"); if (cblas_zhemm_p != NULL) cblas_zhemm_p(layout, left_right, uplo, m, n, alpha, a, lda, b, ldb, beta, c, ldc); } } static void cblas_cherk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const void *a, - const int lda, const float beta, void *c, const int ldc) { + const int n, const int k, const float alpha, const void* a, + const int lda, const float beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_cherk_p == NULL) cblas_cherk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const void *a, - const int lda, const float beta, void *c, + const int n, const int k, const float alpha, const void* a, + const int lda, const float beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_cherk"); if (cblas_cherk_p != NULL) cblas_cherk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -408,13 +408,13 @@ static void cblas_cherk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_zherk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const void *a, - const int lda, const double beta, void *c, const int ldc) { + const int n, const int k, const double alpha, const void* a, + const int lda, const double beta, void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zherk_p == NULL) cblas_zherk_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const void *a, - const int lda, const double beta, void *c, + const int n, const int k, const double alpha, const void* a, + const int lda, const double beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zherk"); if (cblas_zherk_p != NULL) cblas_zherk_p(layout, uplo, trans, n, k, alpha, a, lda, beta, c, ldc); @@ -422,15 +422,15 @@ static void cblas_zherk_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRAN } static void cblas_ssyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const float alpha, const float *a, - const int lda, const float *b, const int ldb, const float beta, - float *c, const int ldc) { + const int n, const int k, const float alpha, const float* a, + const int lda, const float* b, const int ldb, const float beta, + float* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_ssyr2k_p == NULL) cblas_ssyr2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const float alpha, const float *a, const int lda, - const float *b, const int ldb, const float beta, float *c, + const int k, const float alpha, const float* a, const int lda, + const float* b, const int ldb, const float beta, float* c, const int ldc))GET_FUNC(h_libcblas, "cblas_ssyr2k"); if (cblas_ssyr2k_p != NULL) cblas_ssyr2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -438,15 +438,15 @@ static void cblas_ssyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_dsyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const double alpha, const double *a, - const int lda, const double *b, const int ldb, const double beta, - double *c, const int ldc) { + const int n, const int k, const double alpha, const double* a, + const int lda, const double* b, const int ldb, const double beta, + double* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_dsyr2k_p == NULL) cblas_dsyr2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const double alpha, const double *a, const int lda, - const double *b, const int ldb, const double beta, double *c, + const int k, const double alpha, const double* a, const int lda, + const double* b, const int ldb, const double beta, double* c, const int ldc))GET_FUNC(h_libcblas, "cblas_dsyr2k"); if (cblas_dsyr2k_p != NULL) cblas_dsyr2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -454,15 +454,15 @@ static void cblas_dsyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_csyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_csyr2k_p == NULL) cblas_csyr2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *b, const int ldb, const void *beta, void *c, + const int k, const void* alpha, const void* a, const int lda, + const void* b, const int ldb, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_csyr2k"); if (cblas_csyr2k_p != NULL) cblas_csyr2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -470,15 +470,15 @@ static void cblas_csyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_zsyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const void *beta, - void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const void* beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zsyr2k_p == NULL) cblas_zsyr2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *b, const int ldb, const void *beta, void *c, + const int k, const void* alpha, const void* a, const int lda, + const void* b, const int ldb, const void* beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zsyr2k"); if (cblas_zsyr2k_p != NULL) cblas_zsyr2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -486,15 +486,15 @@ static void cblas_zsyr2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_cher2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const float beta, - void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const float beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_cher2k_p == NULL) cblas_cher2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *b, const int ldb, const float beta, void *c, + const int k, const void* alpha, const void* a, const int lda, + const void* b, const int ldb, const float beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_cher2k"); if (cblas_cher2k_p != NULL) cblas_cher2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -502,15 +502,15 @@ static void cblas_cher2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA } static void cblas_zher2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, - const int n, const int k, const void *alpha, const void *a, - const int lda, const void *b, const int ldb, const double beta, - void *c, const int ldc) { + const int n, const int k, const void* alpha, const void* a, + const int lda, const void* b, const int ldb, const double beta, + void* c, const int ldc) { if (cblas_library() != NULL) { if (cblas_zher2k_p == NULL) cblas_zher2k_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRANSPOSE trans, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *b, const int ldb, const double beta, void *c, + const int k, const void* alpha, const void* a, const int lda, + const void* b, const int ldb, const double beta, void* c, const int ldc))GET_FUNC(h_libcblas, "cblas_zher2k"); if (cblas_zher2k_p != NULL) cblas_zher2k_p(layout, uplo, trans, n, k, alpha, a, lda, b, ldb, beta, c, ldc); @@ -519,14 +519,14 @@ static void cblas_zher2k_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO uplo, CBLAS_TRA static void cblas_strmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const float alpha, const float *a, const int lda, float *b, + const float alpha, const float* a, const int lda, float* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_strmm_p == NULL) cblas_strmm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const float alpha, const float *a, const int lda, - float *b, const int ldb))GET_FUNC(h_libcblas, "cblas_strmm"); + const int n, const float alpha, const float* a, const int lda, + float* b, const int ldb))GET_FUNC(h_libcblas, "cblas_strmm"); if (cblas_strmm_p != NULL) cblas_strmm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -534,14 +534,14 @@ static void cblas_strmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_dtrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const double alpha, const double *a, const int lda, double *b, + const double alpha, const double* a, const int lda, double* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_dtrmm_p == NULL) cblas_dtrmm_p = (void (*)( CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int m, const int n, const double alpha, const double *a, - const int lda, double *b, const int ldb))GET_FUNC(h_libcblas, "cblas_dtrmm"); + CBLAS_DIAG diag, const int m, const int n, const double alpha, const double* a, + const int lda, double* b, const int ldb))GET_FUNC(h_libcblas, "cblas_dtrmm"); if (cblas_dtrmm_p != NULL) cblas_dtrmm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -549,14 +549,14 @@ static void cblas_dtrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_ctrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_ctrmm_p == NULL) cblas_ctrmm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const void *alpha, const void *a, const int lda, - void *b, const int ldb))GET_FUNC(h_libcblas, "cblas_ctrmm"); + const int n, const void* alpha, const void* a, const int lda, + void* b, const int ldb))GET_FUNC(h_libcblas, "cblas_ctrmm"); if (cblas_ctrmm_p != NULL) cblas_ctrmm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -564,14 +564,14 @@ static void cblas_ctrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_ztrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_ztrmm_p == NULL) cblas_ztrmm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const void *alpha, const void *a, const int lda, - void *b, const int ldb))GET_FUNC(h_libcblas, "cblas_ztrmm"); + const int n, const void* alpha, const void* a, const int lda, + void* b, const int ldb))GET_FUNC(h_libcblas, "cblas_ztrmm"); if (cblas_ztrmm_p != NULL) cblas_ztrmm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -579,14 +579,14 @@ static void cblas_ztrmm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_strsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const float alpha, const float *a, const int lda, float *b, + const float alpha, const float* a, const int lda, float* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_strsm_p == NULL) cblas_strsm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const float alpha, const float *a, const int lda, - float *b, const int ldb))GET_FUNC(h_libcblas, "cblas_strsm"); + const int n, const float alpha, const float* a, const int lda, + float* b, const int ldb))GET_FUNC(h_libcblas, "cblas_strsm"); if (cblas_strsm_p != NULL) cblas_strsm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -594,14 +594,14 @@ static void cblas_strsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_dtrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const double alpha, const double *a, const int lda, double *b, + const double alpha, const double* a, const int lda, double* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_dtrsm_p == NULL) cblas_dtrsm_p = (void (*)( CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, - CBLAS_DIAG diag, const int m, const int n, const double alpha, const double *a, - const int lda, double *b, const int ldb))GET_FUNC(h_libcblas, "cblas_dtrsm"); + CBLAS_DIAG diag, const int m, const int n, const double alpha, const double* a, + const int lda, double* b, const int ldb))GET_FUNC(h_libcblas, "cblas_dtrsm"); if (cblas_dtrsm_p != NULL) cblas_dtrsm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -609,14 +609,14 @@ static void cblas_dtrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_ctrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_ctrsm_p == NULL) cblas_ctrsm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const void *alpha, const void *a, const int lda, - void *b, const int ldb))GET_FUNC(h_libcblas, "cblas_ctrsm"); + const int n, const void* alpha, const void* a, const int lda, + void* b, const int ldb))GET_FUNC(h_libcblas, "cblas_ctrsm"); if (cblas_ctrsm_p != NULL) cblas_ctrsm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -624,14 +624,14 @@ static void cblas_ctrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO static void cblas_ztrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, const int n, - const void *alpha, const void *a, const int lda, void *b, + const void* alpha, const void* a, const int lda, void* b, const int ldb) { if (cblas_library() != NULL) { if (cblas_ztrsm_p == NULL) cblas_ztrsm_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO uplo, CBLAS_TRANSPOSE transa, CBLAS_DIAG diag, const int m, - const int n, const void *alpha, const void *a, const int lda, - void *b, const int ldb))GET_FUNC(h_libcblas, "cblas_ztrsm"); + const int n, const void* alpha, const void* a, const int lda, + void* b, const int ldb))GET_FUNC(h_libcblas, "cblas_ztrsm"); if (cblas_ztrsm_p != NULL) cblas_ztrsm_p(layout, side, uplo, transa, diag, m, n, alpha, a, lda, b, ldb); } @@ -640,213 +640,213 @@ static void cblas_ztrsm_wrapper(CBLAS_LAYOUT layout, CBLAS_SIDE side, CBLAS_UPLO /* Level 2 */ static void (*cblas_sgemv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const float alpha, const float *a, const int lda, const float *x, - const int incx, const float beta, float *y, const int incy); + const float alpha, const float* a, const int lda, const float* x, + const int incx, const float beta, float* y, const int incy); static void (*cblas_dgemv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const double alpha, const double *a, const int lda, const double *x, - const int incx, const double beta, double *y, const int incy); + const double alpha, const double* a, const int lda, const double* x, + const int incx, const double beta, double* y, const int incy); static void (*cblas_cgemv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_zgemv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_sgbmv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + int kl, int ku, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy); static void (*cblas_dgbmv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + int kl, int ku, const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy); static void (*cblas_cgbmv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + int kl, int ku, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy); static void (*cblas_zgbmv_p)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + int kl, int ku, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy); static void (*cblas_sger_p)(CBLAS_LAYOUT layout, const int m, const int n, const float alpha, - const float *x, const int incx, const float *y, const int incy, - float *a, const int lda); + const float* x, const int incx, const float* y, const int incy, + float* a, const int lda); static void (*cblas_dger_p)(CBLAS_LAYOUT layout, const int m, const int n, const double alpha, - const double *x, const int incx, const double *y, const int incy, - double *a, const int lda); -static void (*cblas_cgerc_p)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + const double* x, const int incx, const double* y, const int incy, + double* a, const int lda); +static void (*cblas_cgerc_p)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda); -static void (*cblas_zgerc_p)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, +static void (*cblas_zgerc_p)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda); -static void (*cblas_cgeru_p)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, +static void (*cblas_cgeru_p)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda); -static void (*cblas_zgeru_p)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, +static void (*cblas_zgeru_p)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda); static void (*cblas_chbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_zhbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_chemv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_zhemv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy); + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy); static void (*cblas_cher_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a, + const float alpha, const void* x, const int incx, void* a, const int lda); static void (*cblas_zher_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a, + const double alpha, const void* x, const int incx, void* a, const int lda); static void (*cblas_cher2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a, const int lda); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a, const int lda); static void (*cblas_zher2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a, const int lda); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a, const int lda); static void (*cblas_chpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, const int incx, - const void *beta, void *y, const int incy); + const void* alpha, const void* a, const void* x, const int incx, + const void* beta, void* y, const int incy); static void (*cblas_zhpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, const int incx, - const void *beta, void *y, const int incy); + const void* alpha, const void* a, const void* x, const int incx, + const void* beta, void* y, const int incy); static void (*cblas_chpr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a); + const float alpha, const void* x, const int incx, void* a); static void (*cblas_zhpr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a); + const double alpha, const void* x, const int incx, void* a); static void (*cblas_chpr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a); static void (*cblas_zhpr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a); static void (*cblas_ssbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const float alpha, const float *a, const int lda, const float *x, - const int incx, const float beta, float *y, const int incy); + const float alpha, const float* a, const int lda, const float* x, + const int incx, const float beta, float* y, const int incy); static void (*cblas_dsbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const double alpha, const double *a, const int lda, const double *x, - const int incx, const double beta, double *y, const int incy); + const double alpha, const double* a, const int lda, const double* x, + const int incx, const double beta, double* y, const int incy); static void (*cblas_ssymv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const int lda, const float *x, - const int incx, const float beta, float *y, const int incy); + const float alpha, const float* a, const int lda, const float* x, + const int incx, const float beta, float* y, const int incy); static void (*cblas_dsymv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const int lda, const double *x, - const int incx, const double beta, double *y, const int incy); + const double alpha, const double* a, const int lda, const double* x, + const int incx, const double beta, double* y, const int incy); static void (*cblas_ssyr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a, + const float alpha, const float* x, const int incx, float* a, const int lda); static void (*cblas_dsyr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a, + const double alpha, const double* x, const int incx, double* a, const int lda); static void (*cblas_ssyr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a, const int lda); + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a, const int lda); static void (*cblas_dsyr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, const double *y, - const int incy, double *a, const int lda); + const double alpha, const double* x, const int incx, const double* y, + const int incy, double* a, const int lda); static void (*cblas_sspmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const float *x, const int incx, - const float beta, float *y, const int incy); + const float alpha, const float* a, const float* x, const int incx, + const float beta, float* y, const int incy); static void (*cblas_dspmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const double *x, const int incx, - const double beta, double *y, const int incy); + const double alpha, const double* a, const double* x, const int incx, + const double beta, double* y, const int incy); static void (*cblas_sspr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a); + const float alpha, const float* x, const int incx, float* a); static void (*cblas_dspr_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a); + const double alpha, const double* x, const int incx, double* a); static void (*cblas_sspr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a); + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a); static void (*cblas_dspr2_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, const double *y, - const int incy, double *a); + const double alpha, const double* x, const int incx, const double* y, + const int incy, double* a); static void (*cblas_stbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const float *a, - const int lda, float *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const float* a, + const int lda, float* x, const int incx); static void (*cblas_dtbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const double *a, - const int lda, double *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const double* a, + const int lda, double* x, const int incx); static void (*cblas_ctbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx); static void (*cblas_ztbmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx); static void (*cblas_stbsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const float *a, - const int lda, float *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const float* a, + const int lda, float* x, const int incx); static void (*cblas_dtbsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const double *a, - const int lda, double *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const double* a, + const int lda, double* x, const int incx); static void (*cblas_ctbsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx); static void (*cblas_ztbsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx); static void (*cblas_stpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx); static void (*cblas_dtpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx); static void (*cblas_ctpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx); static void (*cblas_ztpmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx); static void (*cblas_stpsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx); static void (*cblas_dtpsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx); static void (*cblas_ctpsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx); static void (*cblas_ztpsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx); static void (*cblas_strmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx); static void (*cblas_dtrmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx); static void (*cblas_ctrmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx); static void (*cblas_ztrmv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx); static void (*cblas_strsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx); static void (*cblas_dtrsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx); static void (*cblas_ctrsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx); static void (*cblas_ztrsv_p)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx); + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx); static void cblas_sgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const int n, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_sgemv_p == NULL) cblas_sgemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const int n, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sgemv"); if (cblas_sgemv_p != NULL) cblas_sgemv_p(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); @@ -854,29 +854,29 @@ static void cblas_sgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_dgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + const int n, const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dgemv_p == NULL) cblas_dgemv_p = (void (*)( CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - const double alpha, const double *a, const int lda, const double *x, const int incx, - const double beta, double *y, const int incy))GET_FUNC(h_libcblas, "cblas_dgemv"); + const double alpha, const double* a, const int lda, const double* x, const int incx, + const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dgemv"); if (cblas_dgemv_p != NULL) cblas_dgemv_p(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); } } static void cblas_cgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int n, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_cgemv_p == NULL) cblas_cgemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int n, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_cgemv"); if (cblas_cgemv_p != NULL) cblas_cgemv_p(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); @@ -884,14 +884,14 @@ static void cblas_cgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_zgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int n, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zgemv_p == NULL) cblas_zgemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int n, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zgemv"); if (cblas_zgemv_p != NULL) cblas_zgemv_p(layout, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); @@ -899,15 +899,15 @@ static void cblas_zgemv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_sgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, int kl, int ku, const float alpha, const float *a, - const int lda, const float *x, const int incx, const float beta, - float *y, const int incy) { + const int n, int kl, int ku, const float alpha, const float* a, + const int lda, const float* x, const int incx, const float beta, + float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_sgbmv_p == NULL) cblas_sgbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + int kl, int ku, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sgbmv"); if (cblas_sgbmv_p != NULL) cblas_sgbmv_p(layout, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -915,15 +915,15 @@ static void cblas_sgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_dgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, int kl, int ku, const double alpha, const double *a, - const int lda, const double *x, const int incx, const double beta, - double *y, const int incy) { + const int n, int kl, int ku, const double alpha, const double* a, + const int lda, const double* x, const int incx, const double beta, + double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dgbmv_p == NULL) cblas_dgbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + int kl, int ku, const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dgbmv"); if (cblas_dgbmv_p != NULL) cblas_dgbmv_p(layout, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -931,15 +931,15 @@ static void cblas_dgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_cgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, int kl, int ku, const void *alpha, const void *a, - const int lda, const void *x, const int incx, const void *beta, - void *y, const int incy) { + const int n, int kl, int ku, const void* alpha, const void* a, + const int lda, const void* x, const int incx, const void* beta, + void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_cgbmv_p == NULL) cblas_cgbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + int kl, int ku, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_cgbmv"); if (cblas_cgbmv_p != NULL) cblas_cgbmv_p(layout, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -947,15 +947,15 @@ static void cblas_cgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_zgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, - const int n, int kl, int ku, const void *alpha, const void *a, - const int lda, const void *x, const int incx, const void *beta, - void *y, const int incy) { + const int n, int kl, int ku, const void* alpha, const void* a, + const int lda, const void* x, const int incx, const void* beta, + void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zgbmv_p == NULL) cblas_zgbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, const int m, const int n, - int kl, int ku, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + int kl, int ku, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zgbmv"); if (cblas_zgbmv_p != NULL) cblas_zgbmv_p(layout, trans, m, n, kl, ku, alpha, a, lda, x, incx, beta, y, incy); @@ -963,13 +963,13 @@ static void cblas_zgbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE trans, cons } static void cblas_sger_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const float alpha, - const float *x, const int incx, const float *y, const int incy, - float *a, const int lda) { + const float* x, const int incx, const float* y, const int incy, + float* a, const int lda) { if (cblas_library() != NULL) { if (cblas_sger_p == NULL) cblas_sger_p = (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const float alpha, - const float *x, const int incx, const float *y, const int incy, float *a, + const float* x, const int incx, const float* y, const int incy, float* a, const int lda))GET_FUNC(h_libcblas, "cblas_sger"); if (cblas_sger_p != NULL) cblas_sger_p(layout, m, n, alpha, x, incx, y, incy, a, lda); @@ -977,69 +977,69 @@ static void cblas_sger_wrapper(CBLAS_LAYOUT layout, const int m, const int n, co } static void cblas_dger_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const double alpha, - const double *x, const int incx, const double *y, const int incy, - double *a, const int lda) { + const double* x, const int incx, const double* y, const int incy, + double* a, const int lda) { if (cblas_library() != NULL) { if (cblas_dger_p == NULL) cblas_dger_p = (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const double alpha, - const double *x, const int incx, const double *y, const int incy, - double *a, const int lda))GET_FUNC(h_libcblas, "cblas_dger"); + const double* x, const int incx, const double* y, const int incy, + double* a, const int lda))GET_FUNC(h_libcblas, "cblas_dger"); if (cblas_dger_p != NULL) cblas_dger_p(layout, m, n, alpha, x, incx, y, incy, a, lda); } } -static void cblas_cgerc_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, - void *a, const int lda) { +static void cblas_cgerc_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, + void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_cgerc_p == NULL) cblas_cgerc_p = - (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_cgerc"); if (cblas_cgerc_p != NULL) cblas_cgerc_p(layout, m, n, alpha, x, incx, y, incy, a, lda); } } -static void cblas_zgerc_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, - void *a, const int lda) { +static void cblas_zgerc_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, + void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_zgerc_p == NULL) cblas_zgerc_p = - (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_zgerc"); if (cblas_zgerc_p != NULL) cblas_zgerc_p(layout, m, n, alpha, x, incx, y, incy, a, lda); } } -static void cblas_cgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, - void *a, const int lda) { +static void cblas_cgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, + void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_cgeru_p == NULL) cblas_cgeru_p = - (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_cgeru"); if (cblas_cgeru_p != NULL) cblas_cgeru_p(layout, m, n, alpha, x, incx, y, incy, a, lda); } } -static void cblas_zgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, - void *a, const int lda) { +static void cblas_zgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, + void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_zgeru_p == NULL) cblas_zgeru_p = - (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void *alpha, - const void *x, const int incx, const void *y, const int incy, void *a, + (void (*)(CBLAS_LAYOUT layout, const int m, const int n, const void* alpha, + const void* x, const int incx, const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_zgeru"); if (cblas_zgeru_p != NULL) cblas_zgeru_p(layout, m, n, alpha, x, incx, y, incy, a, lda); @@ -1047,14 +1047,14 @@ static void cblas_zgeru_wrapper(CBLAS_LAYOUT layout, const int m, const int n, c } static void cblas_chbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int k, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_chbmv_p == NULL) cblas_chbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int k, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_chbmv"); if (cblas_chbmv_p != NULL) cblas_chbmv_p(layout, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); @@ -1062,14 +1062,14 @@ static void cblas_chbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_zhbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int k, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zhbmv_p == NULL) cblas_zhbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const int k, const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zhbmv"); if (cblas_zhbmv_p != NULL) cblas_zhbmv_p(layout, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); @@ -1077,13 +1077,13 @@ static void cblas_zhbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_chemv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy) { + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_chemv_p == NULL) cblas_chemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_chemv"); if (cblas_chemv_p != NULL) cblas_chemv_p(layout, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); @@ -1091,13 +1091,13 @@ static void cblas_chemv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_zhemv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, const void *x, - const int incx, const void *beta, void *y, const int incy) { + const void* alpha, const void* a, const int lda, const void* x, + const int incx, const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zhemv_p == NULL) cblas_zhemv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const int lda, - const void *x, const int incx, const void *beta, void *y, + const void* alpha, const void* a, const int lda, + const void* x, const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zhemv"); if (cblas_zhemv_p != NULL) cblas_zhemv_p(layout, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); @@ -1105,12 +1105,12 @@ static void cblas_zhemv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_cher_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a, + const float alpha, const void* x, const int incx, void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_cher_p == NULL) cblas_cher_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a, + const float alpha, const void* x, const int incx, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_cher"); if (cblas_cher_p != NULL) cblas_cher_p(layout, upper_lower, n, alpha, x, incx, a, lda); @@ -1118,12 +1118,12 @@ static void cblas_cher_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, cons } static void cblas_zher_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a, + const double alpha, const void* x, const int incx, void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_zher_p == NULL) cblas_zher_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a, + const double alpha, const void* x, const int incx, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_zher"); if (cblas_zher_p != NULL) cblas_zher_p(layout, upper_lower, n, alpha, x, incx, a, lda); @@ -1131,13 +1131,13 @@ static void cblas_zher_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, cons } static void cblas_cher2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a, const int lda) { + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_cher2_p == NULL) cblas_cher2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, - const void *y, const int incy, void *a, + const void* alpha, const void* x, const int incx, + const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_cher2"); if (cblas_cher2_p != NULL) cblas_cher2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a, lda); @@ -1145,13 +1145,13 @@ static void cblas_cher2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_zher2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a, const int lda) { + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a, const int lda) { if (cblas_library() != NULL) { if (cblas_zher2_p == NULL) cblas_zher2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, - const void *y, const int incy, void *a, + const void* alpha, const void* x, const int incx, + const void* y, const int incy, void* a, const int lda))GET_FUNC(h_libcblas, "cblas_zher2"); if (cblas_zher2_p != NULL) cblas_zher2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a, lda); @@ -1159,13 +1159,13 @@ static void cblas_zher2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_chpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, const int incx, - const void *beta, void *y, const int incy) { + const void* alpha, const void* a, const void* x, const int incx, + const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_chpmv_p == NULL) cblas_chpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, - const int incx, const void *beta, void *y, + const void* alpha, const void* a, const void* x, + const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_chpmv"); if (cblas_chpmv_p != NULL) cblas_chpmv_p(layout, upper_lower, n, alpha, a, x, incx, beta, y, incy); @@ -1173,13 +1173,13 @@ static void cblas_chpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_zhpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, const int incx, - const void *beta, void *y, const int incy) { + const void* alpha, const void* a, const void* x, const int incx, + const void* beta, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zhpmv_p == NULL) cblas_zhpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *a, const void *x, - const int incx, const void *beta, void *y, + const void* alpha, const void* a, const void* x, + const int incx, const void* beta, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zhpmv"); if (cblas_zhpmv_p != NULL) cblas_zhpmv_p(layout, upper_lower, n, alpha, a, x, incx, beta, y, incy); @@ -1187,66 +1187,66 @@ static void cblas_zhpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_chpr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, void *a) { + const float alpha, const void* x, const int incx, void* a) { if (cblas_library() != NULL) { if (cblas_chpr_p == NULL) cblas_chpr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const void *x, const int incx, - void *a))GET_FUNC(h_libcblas, "cblas_chpr"); + const float alpha, const void* x, const int incx, + void* a))GET_FUNC(h_libcblas, "cblas_chpr"); if (cblas_chpr_p != NULL) cblas_chpr_p(layout, upper_lower, n, alpha, x, incx, a); } } static void cblas_zhpr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, void *a) { + const double alpha, const void* x, const int incx, void* a) { if (cblas_library() != NULL) { if (cblas_zhpr_p == NULL) cblas_zhpr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const void *x, const int incx, - void *a))GET_FUNC(h_libcblas, "cblas_zhpr"); + const double alpha, const void* x, const int incx, + void* a))GET_FUNC(h_libcblas, "cblas_zhpr"); if (cblas_zhpr_p != NULL) cblas_zhpr_p(layout, upper_lower, n, alpha, x, incx, a); } } static void cblas_chpr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a) { + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a) { if (cblas_library() != NULL) { if (cblas_chpr2_p == NULL) cblas_chpr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a))GET_FUNC(h_libcblas, "cblas_chpr2"); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a))GET_FUNC(h_libcblas, "cblas_chpr2"); if (cblas_chpr2_p != NULL) cblas_chpr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a); } } static void cblas_zhpr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a) { + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a) { if (cblas_library() != NULL) { if (cblas_zhpr2_p == NULL) cblas_zhpr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const void *alpha, const void *x, const int incx, const void *y, - const int incy, void *a))GET_FUNC(h_libcblas, "cblas_zhpr2"); + const void* alpha, const void* x, const int incx, const void* y, + const int incy, void* a))GET_FUNC(h_libcblas, "cblas_zhpr2"); if (cblas_zhpr2_p != NULL) cblas_zhpr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a); } } static void cblas_ssbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const int k, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_ssbmv_p == NULL) cblas_ssbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const int k, const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_ssbmv"); if (cblas_ssbmv_p != NULL) cblas_ssbmv_p(layout, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); @@ -1254,28 +1254,28 @@ static void cblas_ssbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_dsbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const int k, const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + const int k, const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dsbmv_p == NULL) cblas_dsbmv_p = (void (*)( CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, const int k, - const double alpha, const double *a, const int lda, const double *x, const int incx, - const double beta, double *y, const int incy))GET_FUNC(h_libcblas, "cblas_dsbmv"); + const double alpha, const double* a, const int lda, const double* x, const int incx, + const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dsbmv"); if (cblas_dsbmv_p != NULL) cblas_dsbmv_p(layout, upper_lower, n, k, alpha, a, lda, x, incx, beta, y, incy); } } static void cblas_ssymv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const int lda, const float *x, - const int incx, const float beta, float *y, const int incy) { + const float alpha, const float* a, const int lda, const float* x, + const int incx, const float beta, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_ssymv_p == NULL) cblas_ssymv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const int lda, - const float *x, const int incx, const float beta, float *y, + const float alpha, const float* a, const int lda, + const float* x, const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_ssymv"); if (cblas_ssymv_p != NULL) cblas_ssymv_p(layout, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); @@ -1283,13 +1283,13 @@ static void cblas_ssymv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_dsymv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const int lda, const double *x, - const int incx, const double beta, double *y, const int incy) { + const double alpha, const double* a, const int lda, const double* x, + const int incx, const double beta, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dsymv_p == NULL) cblas_dsymv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const int lda, - const double *x, const int incx, const double beta, double *y, + const double alpha, const double* a, const int lda, + const double* x, const int incx, const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dsymv"); if (cblas_dsymv_p != NULL) cblas_dsymv_p(layout, upper_lower, n, alpha, a, lda, x, incx, beta, y, incy); @@ -1297,12 +1297,12 @@ static void cblas_dsymv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_ssyr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a, + const float alpha, const float* x, const int incx, float* a, const int lda) { if (cblas_library() != NULL) { if (cblas_ssyr_p == NULL) cblas_ssyr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a, + const float alpha, const float* x, const int incx, float* a, const int lda))GET_FUNC(h_libcblas, "cblas_ssyr"); if (cblas_ssyr_p != NULL) cblas_ssyr_p(layout, upper_lower, n, alpha, x, incx, a, lda); @@ -1310,12 +1310,12 @@ static void cblas_ssyr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, cons } static void cblas_dsyr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a, + const double alpha, const double* x, const int incx, double* a, const int lda) { if (cblas_library() != NULL) { if (cblas_dsyr_p == NULL) cblas_dsyr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a, + const double alpha, const double* x, const int incx, double* a, const int lda))GET_FUNC(h_libcblas, "cblas_dsyr"); if (cblas_dsyr_p != NULL) cblas_dsyr_p(layout, upper_lower, n, alpha, x, incx, a, lda); @@ -1323,13 +1323,13 @@ static void cblas_dsyr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, cons } static void cblas_ssyr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a, const int lda) { + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a, const int lda) { if (cblas_library() != NULL) { if (cblas_ssyr2_p == NULL) cblas_ssyr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, - const float *y, const int incy, float *a, + const float alpha, const float* x, const int incx, + const float* y, const int incy, float* a, const int lda))GET_FUNC(h_libcblas, "cblas_ssyr2"); if (cblas_ssyr2_p != NULL) cblas_ssyr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a, lda); @@ -1337,13 +1337,13 @@ static void cblas_ssyr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_dsyr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, - const double *y, const int incy, double *a, const int lda) { + const double alpha, const double* x, const int incx, + const double* y, const int incy, double* a, const int lda) { if (cblas_library() != NULL) { if (cblas_dsyr2_p == NULL) cblas_dsyr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, - const double *y, const int incy, double *a, + const double alpha, const double* x, const int incx, + const double* y, const int incy, double* a, const int lda))GET_FUNC(h_libcblas, "cblas_dsyr2"); if (cblas_dsyr2_p != NULL) cblas_dsyr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a, lda); @@ -1351,13 +1351,13 @@ static void cblas_dsyr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_sspmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const float *x, const int incx, - const float beta, float *y, const int incy) { + const float alpha, const float* a, const float* x, const int incx, + const float beta, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_sspmv_p == NULL) cblas_sspmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *a, const float *x, - const int incx, const float beta, float *y, + const float alpha, const float* a, const float* x, + const int incx, const float beta, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sspmv"); if (cblas_sspmv_p != NULL) cblas_sspmv_p(layout, upper_lower, n, alpha, a, x, incx, beta, y, incy); @@ -1365,13 +1365,13 @@ static void cblas_sspmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_dspmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const double *x, - const int incx, const double beta, double *y, const int incy) { + const double alpha, const double* a, const double* x, + const int incx, const double beta, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dspmv_p == NULL) cblas_dspmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *a, const double *x, - const int incx, const double beta, double *y, + const double alpha, const double* a, const double* x, + const int incx, const double beta, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dspmv"); if (cblas_dspmv_p != NULL) cblas_dspmv_p(layout, upper_lower, n, alpha, a, x, incx, beta, y, incy); @@ -1379,65 +1379,65 @@ static void cblas_dspmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, con } static void cblas_sspr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, float *a) { + const float alpha, const float* x, const int incx, float* a) { if (cblas_library() != NULL) { if (cblas_sspr_p == NULL) cblas_sspr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, - float *a))GET_FUNC(h_libcblas, "cblas_sspr"); + const float alpha, const float* x, const int incx, + float* a))GET_FUNC(h_libcblas, "cblas_sspr"); if (cblas_sspr_p != NULL) cblas_sspr_p(layout, upper_lower, n, alpha, x, incx, a); } } static void cblas_dspr_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, double *a) { + const double alpha, const double* x, const int incx, double* a) { if (cblas_library() != NULL) { if (cblas_dspr_p == NULL) cblas_dspr_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, - double *a))GET_FUNC(h_libcblas, "cblas_dspr"); + const double alpha, const double* x, const int incx, + double* a))GET_FUNC(h_libcblas, "cblas_dspr"); if (cblas_dspr_p != NULL) cblas_dspr_p(layout, upper_lower, n, alpha, x, incx, a); } } static void cblas_sspr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a) { + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a) { if (cblas_library() != NULL) { if (cblas_sspr2_p == NULL) cblas_sspr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const float alpha, const float *x, const int incx, const float *y, - const int incy, float *a))GET_FUNC(h_libcblas, "cblas_sspr2"); + const float alpha, const float* x, const int incx, const float* y, + const int incy, float* a))GET_FUNC(h_libcblas, "cblas_sspr2"); if (cblas_sspr2_p != NULL) cblas_sspr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a); } } static void cblas_dspr2_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, - const double *y, const int incy, double *a) { + const double alpha, const double* x, const int incx, + const double* y, const int incy, double* a) { if (cblas_library() != NULL) { if (cblas_dspr2_p == NULL) cblas_dspr2_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, const int n, - const double alpha, const double *x, const int incx, const double *y, - const int incy, double *a))GET_FUNC(h_libcblas, "cblas_dspr2"); + const double alpha, const double* x, const int incx, const double* y, + const int incy, double* a))GET_FUNC(h_libcblas, "cblas_dspr2"); if (cblas_dspr2_p != NULL) cblas_dspr2_p(layout, upper_lower, n, alpha, x, incx, y, incy, a); } } static void cblas_stbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const float *a, - const int lda, float *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const float* a, + const int lda, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_stbmv_p == NULL) cblas_stbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const float *a, const int lda, float *x, + const int k, const float* a, const int lda, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_stbmv"); if (cblas_stbmv_p != NULL) cblas_stbmv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1445,13 +1445,13 @@ static void cblas_stbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_dtbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const double *a, - const int lda, double *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const double* a, + const int lda, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtbmv_p == NULL) cblas_dtbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const double *a, const int lda, double *x, + const int k, const double* a, const int lda, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtbmv"); if (cblas_dtbmv_p != NULL) cblas_dtbmv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1459,13 +1459,13 @@ static void cblas_dtbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ctbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctbmv_p == NULL) cblas_ctbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const void *a, const int lda, void *x, + const int k, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctbmv"); if (cblas_ctbmv_p != NULL) cblas_ctbmv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1473,13 +1473,13 @@ static void cblas_ctbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztbmv_p == NULL) cblas_ztbmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const void *a, const int lda, void *x, + const int k, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztbmv"); if (cblas_ztbmv_p != NULL) cblas_ztbmv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1487,13 +1487,13 @@ static void cblas_ztbmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_stbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const float *a, - const int lda, float *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const float* a, + const int lda, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_stbsv_p == NULL) cblas_stbsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const float *a, const int lda, float *x, + const int k, const float* a, const int lda, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_stbsv"); if (cblas_stbsv_p != NULL) cblas_stbsv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1501,13 +1501,13 @@ static void cblas_stbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_dtbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const double *a, - const int lda, double *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const double* a, + const int lda, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtbsv_p == NULL) cblas_dtbsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const double *a, const int lda, double *x, + const int k, const double* a, const int lda, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtbsv"); if (cblas_dtbsv_p != NULL) cblas_dtbsv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1515,13 +1515,13 @@ static void cblas_dtbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ctbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctbsv_p == NULL) cblas_ctbsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const void *a, const int lda, void *x, + const int k, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctbsv"); if (cblas_ctbsv_p != NULL) cblas_ctbsv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1529,13 +1529,13 @@ static void cblas_ctbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const int k, const void *a, - const int lda, void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const int k, const void* a, + const int lda, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztbsv_p == NULL) cblas_ztbsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, CBLAS_DIAG unit_diag, const int n, - const int k, const void *a, const int lda, void *x, + const int k, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztbsv"); if (cblas_ztbsv_p != NULL) cblas_ztbsv_p(layout, upper_lower, trans, unit_diag, n, k, a, lda, x, incx); @@ -1543,13 +1543,13 @@ static void cblas_ztbsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_stpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_stpmv_p == NULL) cblas_stpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_stpmv"); if (cblas_stpmv_p != NULL) cblas_stpmv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1557,13 +1557,13 @@ static void cblas_stpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_dtpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtpmv_p == NULL) cblas_dtpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtpmv"); if (cblas_dtpmv_p != NULL) cblas_dtpmv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1571,13 +1571,13 @@ static void cblas_dtpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ctpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctpmv_p == NULL) cblas_ctpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctpmv"); if (cblas_ctpmv_p != NULL) cblas_ctpmv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1585,13 +1585,13 @@ static void cblas_ctpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztpmv_p == NULL) cblas_ztpmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztpmv"); if (cblas_ztpmv_p != NULL) cblas_ztpmv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1599,13 +1599,13 @@ static void cblas_ztpmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_stpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_stpsv_p == NULL) cblas_stpsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, float *x, + CBLAS_DIAG unit_diag, const int n, const float* a, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_stpsv"); if (cblas_stpsv_p != NULL) cblas_stpsv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1613,13 +1613,13 @@ static void cblas_stpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_dtpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtpsv_p == NULL) cblas_dtpsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, double *x, + CBLAS_DIAG unit_diag, const int n, const double* a, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtpsv"); if (cblas_dtpsv_p != NULL) cblas_dtpsv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1627,13 +1627,13 @@ static void cblas_dtpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ctpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctpsv_p == NULL) cblas_ctpsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctpsv"); if (cblas_ctpsv_p != NULL) cblas_ctpsv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1641,13 +1641,13 @@ static void cblas_ctpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztpsv_p == NULL) cblas_ztpsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztpsv"); if (cblas_ztpsv_p != NULL) cblas_ztpsv_p(layout, upper_lower, trans, unit_diag, n, a, x, incx); @@ -1655,41 +1655,41 @@ static void cblas_ztpsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_strmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_strmv_p == NULL) cblas_strmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx))GET_FUNC(h_libcblas, "cblas_strmv"); + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx))GET_FUNC(h_libcblas, "cblas_strmv"); if (cblas_strmv_p != NULL) cblas_strmv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); } } static void cblas_dtrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtrmv_p == NULL) cblas_dtrmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx))GET_FUNC(h_libcblas, "cblas_dtrmv"); + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtrmv"); if (cblas_dtrmv_p != NULL) cblas_dtrmv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); } } static void cblas_ctrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctrmv_p == NULL) cblas_ctrmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctrmv"); if (cblas_ctrmv_p != NULL) cblas_ctrmv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); @@ -1697,13 +1697,13 @@ static void cblas_ctrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztrmv_p == NULL) cblas_ztrmv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztrmv"); if (cblas_ztrmv_p != NULL) cblas_ztrmv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); @@ -1711,41 +1711,41 @@ static void cblas_ztrmv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_strsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_strsv_p == NULL) cblas_strsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const float *a, const int lda, - float *x, const int incx))GET_FUNC(h_libcblas, "cblas_strsv"); + CBLAS_DIAG unit_diag, const int n, const float* a, const int lda, + float* x, const int incx))GET_FUNC(h_libcblas, "cblas_strsv"); if (cblas_strsv_p != NULL) cblas_strsv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); } } static void cblas_dtrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dtrsv_p == NULL) cblas_dtrsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const double *a, const int lda, - double *x, const int incx))GET_FUNC(h_libcblas, "cblas_dtrsv"); + CBLAS_DIAG unit_diag, const int n, const double* a, const int lda, + double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dtrsv"); if (cblas_dtrsv_p != NULL) cblas_dtrsv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); } } static void cblas_ctrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ctrsv_p == NULL) cblas_ctrsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ctrsv"); if (cblas_ctrsv_p != NULL) cblas_ctrsv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); @@ -1753,13 +1753,13 @@ static void cblas_ctrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL } static void cblas_ztrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, - void *x, const int incx) { + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, + void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_ztrsv_p == NULL) cblas_ztrsv_p = (void (*)(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBLAS_TRANSPOSE trans, - CBLAS_DIAG unit_diag, const int n, const void *a, const int lda, void *x, + CBLAS_DIAG unit_diag, const int n, const void* a, const int lda, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_ztrsv"); if (cblas_ztrsv_p != NULL) cblas_ztrsv_p(layout, upper_lower, trans, unit_diag, n, a, lda, x, incx); @@ -1768,81 +1768,81 @@ static void cblas_ztrsv_wrapper(CBLAS_LAYOUT layout, CBLAS_UPLO upper_lower, CBL /* Level 1 */ -static float (*cblas_sasum_p)(const int n, const float *x, const int incx); -static double (*cblas_dasum_p)(const int n, const double *x, const int incx); -static float (*cblas_scasum_p)(const int n, const void *x, const int incx); -static double (*cblas_dzasum_p)(const int n, const void *x, const int incx); -static void (*cblas_saxpy_p)(const int n, const float alpha, const float *x, const int incx, - float *y, const int incy); -static void (*cblas_daxpy_p)(const int n, const double alpha, const double *x, const int incx, - double *y, const int incy); -static void (*cblas_caxpy_p)(const int n, const void *alpha, const void *x, const int incx, void *y, +static float (*cblas_sasum_p)(const int n, const float* x, const int incx); +static double (*cblas_dasum_p)(const int n, const double* x, const int incx); +static float (*cblas_scasum_p)(const int n, const void* x, const int incx); +static double (*cblas_dzasum_p)(const int n, const void* x, const int incx); +static void (*cblas_saxpy_p)(const int n, const float alpha, const float* x, const int incx, + float* y, const int incy); +static void (*cblas_daxpy_p)(const int n, const double alpha, const double* x, const int incx, + double* y, const int incy); +static void (*cblas_caxpy_p)(const int n, const void* alpha, const void* x, const int incx, void* y, const int incy); -static void (*cblas_zaxpy_p)(const int n, const void *alpha, const void *x, const int incx, void *y, +static void (*cblas_zaxpy_p)(const int n, const void* alpha, const void* x, const int incx, void* y, const int incy); -static void (*cblas_scopy_p)(const int n, const float *x, const int incx, float *y, const int incy); -static void (*cblas_dcopy_p)(const int n, const double *x, const int incx, double *y, +static void (*cblas_scopy_p)(const int n, const float* x, const int incx, float* y, const int incy); +static void (*cblas_dcopy_p)(const int n, const double* x, const int incx, double* y, const int incy); -static void (*cblas_ccopy_p)(const int n, const void *x, const int incx, void *y, const int incy); -static void (*cblas_zcopy_p)(const int n, const void *x, const int incx, void *y, const int incy); -static float (*cblas_sdot_p)(const int n, const float *x, const int incx, const float *y, +static void (*cblas_ccopy_p)(const int n, const void* x, const int incx, void* y, const int incy); +static void (*cblas_zcopy_p)(const int n, const void* x, const int incx, void* y, const int incy); +static float (*cblas_sdot_p)(const int n, const float* x, const int incx, const float* y, const int incy); -static double (*cblas_ddot_p)(const int n, const double *x, const int incx, const double *y, +static double (*cblas_ddot_p)(const int n, const double* x, const int incx, const double* y, const int incy); -static double (*cblas_dsdot_p)(const int n, const float *x, const int incx, const float *y, +static double (*cblas_dsdot_p)(const int n, const float* x, const int incx, const float* y, const int incy); -static float (*cblas_sdsdot_p)(const int n, const float sb, const float *x, const int incx, - const float *y, const int incy); -static float (*cblas_snrm2_p)(const int n, const float *x, const int incx); -static double (*cblas_dnrm2_p)(const int n, const double *x, const int incx); -static float (*cblas_scnrm2_p)(const int n, const void *x, const int incx); -static double (*cblas_dznrm2_p)(const int n, const void *x, const int incx); -static void (*cblas_srot_p)(const int n, float *x, const int incx, float *y, const int incy, +static float (*cblas_sdsdot_p)(const int n, const float sb, const float* x, const int incx, + const float* y, const int incy); +static float (*cblas_snrm2_p)(const int n, const float* x, const int incx); +static double (*cblas_dnrm2_p)(const int n, const double* x, const int incx); +static float (*cblas_scnrm2_p)(const int n, const void* x, const int incx); +static double (*cblas_dznrm2_p)(const int n, const void* x, const int incx); +static void (*cblas_srot_p)(const int n, float* x, const int incx, float* y, const int incy, const float c, const float s); -static void (*cblas_drot_p)(const int n, double *x, const int incx, double *y, const int incy, +static void (*cblas_drot_p)(const int n, double* x, const int incx, double* y, const int incy, const double c, const double s); -static void (*csrot_p)(const int *n, void *x, const int *incx, void *y, const int *incy, - const float *c, const float *s); -static void (*zdrot_p)(const int *n, void *x, const int *incx, void *y, const int *incy, - const double *c, const double *s); -static void (*cblas_srotg_p)(float *a, float *b, float *c, float *s); -static void (*cblas_drotg_p)(double *a, double *b, double *c, double *s); -static void (*crotg_p)(void *a, void *b, float *c, void *s); -static void (*zrotg_p)(void *a, void *b, double *c, void *s); -static void (*cblas_srotm_p)(const int n, float *x, const int incx, float *y, const int incy, - const float *param); -static void (*cblas_drotm_p)(const int n, double *x, const int incx, double *y, const int incy, - const double *param); -static void (*cblas_srotmg_p)(float *d1, float *d2, float *x1, float y1, float *param); -static void (*cblas_drotmg_p)(double *d1, double *d2, double *x1, double y1, double *param); -static void (*cblas_sscal_p)(const int n, const float alpha, float *x, const int incx); -static void (*cblas_dscal_p)(const int n, const double alpha, double *x, const int incx); -static void (*cblas_cscal_p)(const int n, const void *alpha, void *x, const int incx); -static void (*cblas_zscal_p)(const int n, const void *alpha, void *x, const int incx); -static void (*cblas_csscal_p)(const int n, const float alpha, void *x, const int incx); -static void (*cblas_zdscal_p)(const int n, const double alpha, void *x, const int incx); -static void (*cblas_sswap_p)(const int n, float *x, const int incx, float *y, const int incy); -static void (*cblas_dswap_p)(const int n, double *x, const int incx, double *y, const int incy); -static void (*cblas_cswap_p)(const int n, void *x, const int incx, void *y, const int incy); -static void (*cblas_zswap_p)(const int n, void *x, const int incx, void *y, const int incy); -static void (*cblas_cdotc_sub_p)(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres); -static void (*cblas_zdotc_sub_p)(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres); -static void (*cblas_cdotu_sub_p)(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres); -static void (*cblas_zdotu_sub_p)(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres); -static int (*cblas_isamax_p)(const int n, const float *x, const int incx); -static int (*cblas_idamax_p)(const int n, const double *x, const int incx); -static int (*cblas_icamax_p)(const int n, const void *x, const int incx); -static int (*cblas_izamax_p)(const int n, const void *x, const int incx); - -static float cblas_sasum_wrapper(const int n, const float *x, const int incx) { +static void (*csrot_p)(const int* n, void* x, const int* incx, void* y, const int* incy, + const float* c, const float* s); +static void (*zdrot_p)(const int* n, void* x, const int* incx, void* y, const int* incy, + const double* c, const double* s); +static void (*cblas_srotg_p)(float* a, float* b, float* c, float* s); +static void (*cblas_drotg_p)(double* a, double* b, double* c, double* s); +static void (*crotg_p)(void* a, void* b, float* c, void* s); +static void (*zrotg_p)(void* a, void* b, double* c, void* s); +static void (*cblas_srotm_p)(const int n, float* x, const int incx, float* y, const int incy, + const float* param); +static void (*cblas_drotm_p)(const int n, double* x, const int incx, double* y, const int incy, + const double* param); +static void (*cblas_srotmg_p)(float* d1, float* d2, float* x1, float y1, float* param); +static void (*cblas_drotmg_p)(double* d1, double* d2, double* x1, double y1, double* param); +static void (*cblas_sscal_p)(const int n, const float alpha, float* x, const int incx); +static void (*cblas_dscal_p)(const int n, const double alpha, double* x, const int incx); +static void (*cblas_cscal_p)(const int n, const void* alpha, void* x, const int incx); +static void (*cblas_zscal_p)(const int n, const void* alpha, void* x, const int incx); +static void (*cblas_csscal_p)(const int n, const float alpha, void* x, const int incx); +static void (*cblas_zdscal_p)(const int n, const double alpha, void* x, const int incx); +static void (*cblas_sswap_p)(const int n, float* x, const int incx, float* y, const int incy); +static void (*cblas_dswap_p)(const int n, double* x, const int incx, double* y, const int incy); +static void (*cblas_cswap_p)(const int n, void* x, const int incx, void* y, const int incy); +static void (*cblas_zswap_p)(const int n, void* x, const int incx, void* y, const int incy); +static void (*cblas_cdotc_sub_p)(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres); +static void (*cblas_zdotc_sub_p)(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres); +static void (*cblas_cdotu_sub_p)(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres); +static void (*cblas_zdotu_sub_p)(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres); +static int (*cblas_isamax_p)(const int n, const float* x, const int incx); +static int (*cblas_idamax_p)(const int n, const double* x, const int incx); +static int (*cblas_icamax_p)(const int n, const void* x, const int incx); +static int (*cblas_izamax_p)(const int n, const void* x, const int incx); + +static float cblas_sasum_wrapper(const int n, const float* x, const int incx) { float sasum_res = 0.0f; if (cblas_library() != NULL) { if (cblas_sasum_p == NULL) - cblas_sasum_p = (float (*)(const int n, const float *x, const int incx))GET_FUNC( + cblas_sasum_p = (float (*)(const int n, const float* x, const int incx))GET_FUNC( h_libcblas, "cblas_sasum"); if (cblas_sasum_p != NULL) sasum_res = cblas_sasum_p(n, x, incx); @@ -1850,11 +1850,11 @@ static float cblas_sasum_wrapper(const int n, const float *x, const int incx) { return sasum_res; } -static double cblas_dasum_wrapper(const int n, const double *x, const int incx) { +static double cblas_dasum_wrapper(const int n, const double* x, const int incx) { double dasum_res = 0.0; if (cblas_library() != NULL) { if (cblas_dasum_p == NULL) - cblas_dasum_p = (double (*)(const int n, const double *x, const int incx))GET_FUNC( + cblas_dasum_p = (double (*)(const int n, const double* x, const int incx))GET_FUNC( h_libcblas, "cblas_dasum"); if (cblas_dasum_p != NULL) dasum_res = cblas_dasum_p(n, x, incx); @@ -1862,11 +1862,11 @@ static double cblas_dasum_wrapper(const int n, const double *x, const int incx) return dasum_res; } -static float cblas_scasum_wrapper(const int n, const void *x, const int incx) { +static float cblas_scasum_wrapper(const int n, const void* x, const int incx) { float scasum_res = 0.0f; if (cblas_library() != NULL) { if (cblas_scasum_p == NULL) - cblas_scasum_p = (float (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_scasum_p = (float (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_scasum"); if (cblas_scasum_p != NULL) scasum_res = cblas_scasum_p(n, x, incx); @@ -1874,11 +1874,11 @@ static float cblas_scasum_wrapper(const int n, const void *x, const int incx) { return scasum_res; } -static double cblas_dzasum_wrapper(const int n, const void *x, const int incx) { +static double cblas_dzasum_wrapper(const int n, const void* x, const int incx) { double dzasum_res = 0.0; if (cblas_library() != NULL) { if (cblas_dzasum_p == NULL) - cblas_dzasum_p = (double (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_dzasum_p = (double (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_dzasum"); if (cblas_dzasum_p != NULL) dzasum_res = cblas_dzasum_p(n, x, incx); @@ -1886,102 +1886,102 @@ static double cblas_dzasum_wrapper(const int n, const void *x, const int incx) { return dzasum_res; } -static void cblas_saxpy_wrapper(const int n, const float alpha, const float *x, const int incx, - float *y, const int incy) { +static void cblas_saxpy_wrapper(const int n, const float alpha, const float* x, const int incx, + float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_saxpy_p == NULL) cblas_saxpy_p = - (void (*)(const int n, const float alpha, const float *x, const int incx, float *y, + (void (*)(const int n, const float alpha, const float* x, const int incx, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_saxpy"); if (cblas_saxpy_p != NULL) cblas_saxpy_p(n, alpha, x, incx, y, incy); } } -static void cblas_daxpy_wrapper(const int n, const double alpha, const double *x, const int incx, - double *y, const int incy) { +static void cblas_daxpy_wrapper(const int n, const double alpha, const double* x, const int incx, + double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_daxpy_p == NULL) cblas_daxpy_p = - (void (*)(const int n, const double alpha, const double *x, const int incx, - double *y, const int incy))GET_FUNC(h_libcblas, "cblas_daxpy"); + (void (*)(const int n, const double alpha, const double* x, const int incx, + double* y, const int incy))GET_FUNC(h_libcblas, "cblas_daxpy"); if (cblas_daxpy_p != NULL) cblas_daxpy_p(n, alpha, x, incx, y, incy); } } -static void cblas_caxpy_wrapper(const int n, const void *alpha, const void *x, const int incx, - void *y, const int incy) { +static void cblas_caxpy_wrapper(const int n, const void* alpha, const void* x, const int incx, + void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_caxpy_p == NULL) - cblas_caxpy_p = (void (*)(const int n, const void *alpha, const void *x, const int incx, - void *y, const int incy))GET_FUNC(h_libcblas, "cblas_caxpy"); + cblas_caxpy_p = (void (*)(const int n, const void* alpha, const void* x, const int incx, + void* y, const int incy))GET_FUNC(h_libcblas, "cblas_caxpy"); if (cblas_caxpy_p != NULL) cblas_caxpy_p(n, alpha, x, incx, y, incy); } } -static void cblas_zaxpy_wrapper(const int n, const void *alpha, const void *x, const int incx, - void *y, const int incy) { +static void cblas_zaxpy_wrapper(const int n, const void* alpha, const void* x, const int incx, + void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zaxpy_p == NULL) - cblas_zaxpy_p = (void (*)(const int n, const void *alpha, const void *x, const int incx, - void *y, const int incy))GET_FUNC(h_libcblas, "cblas_zaxpy"); + cblas_zaxpy_p = (void (*)(const int n, const void* alpha, const void* x, const int incx, + void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zaxpy"); if (cblas_zaxpy_p != NULL) cblas_zaxpy_p(n, alpha, x, incx, y, incy); } } -static void cblas_scopy_wrapper(const int n, const float *x, const int incx, float *y, +static void cblas_scopy_wrapper(const int n, const float* x, const int incx, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_scopy_p == NULL) - cblas_scopy_p = (void (*)(const int n, const float *x, const int incx, float *y, + cblas_scopy_p = (void (*)(const int n, const float* x, const int incx, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_scopy"); if (cblas_scopy_p != NULL) cblas_scopy_p(n, x, incx, y, incy); } } -static void cblas_dcopy_wrapper(const int n, const double *x, const int incx, double *y, +static void cblas_dcopy_wrapper(const int n, const double* x, const int incx, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dcopy_p == NULL) - cblas_dcopy_p = (void (*)(const int n, const double *x, const int incx, double *y, + cblas_dcopy_p = (void (*)(const int n, const double* x, const int incx, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dcopy"); if (cblas_dcopy_p != NULL) cblas_dcopy_p(n, x, incx, y, incy); } } -static void cblas_ccopy_wrapper(const int n, const void *x, const int incx, void *y, +static void cblas_ccopy_wrapper(const int n, const void* x, const int incx, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_ccopy_p == NULL) - cblas_ccopy_p = (void (*)(const int n, const void *x, const int incx, void *y, + cblas_ccopy_p = (void (*)(const int n, const void* x, const int incx, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_ccopy"); if (cblas_ccopy_p != NULL) cblas_ccopy_p(n, x, incx, y, incy); } } -static void cblas_zcopy_wrapper(const int n, const void *x, const int incx, void *y, +static void cblas_zcopy_wrapper(const int n, const void* x, const int incx, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zcopy_p == NULL) - cblas_zcopy_p = (void (*)(const int n, const void *x, const int incx, void *y, + cblas_zcopy_p = (void (*)(const int n, const void* x, const int incx, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zcopy"); if (cblas_zcopy_p != NULL) cblas_zcopy_p(n, x, incx, y, incy); } } -static float cblas_sdot_wrapper(const int n, const float *x, const int incx, const float *y, +static float cblas_sdot_wrapper(const int n, const float* x, const int incx, const float* y, const int incy) { float sdot_res = 0.0f; if (cblas_library() != NULL) { if (cblas_sdot_p == NULL) - cblas_sdot_p = (float (*)(const int n, const float *x, const int incx, const float *y, + cblas_sdot_p = (float (*)(const int n, const float* x, const int incx, const float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sdot"); if (cblas_sdot_p != NULL) sdot_res = cblas_sdot_p(n, x, incx, y, incy); @@ -1989,13 +1989,13 @@ static float cblas_sdot_wrapper(const int n, const float *x, const int incx, con return sdot_res; } -static double cblas_ddot_wrapper(const int n, const double *x, const int incx, const double *y, +static double cblas_ddot_wrapper(const int n, const double* x, const int incx, const double* y, const int incy) { double ddot_res = 0.0; if (cblas_library() != NULL) { if (cblas_ddot_p == NULL) cblas_ddot_p = - (double (*)(const int n, const double *x, const int incx, const double *y, + (double (*)(const int n, const double* x, const int incx, const double* y, const int incy))GET_FUNC(h_libcblas, "cblas_ddot"); if (cblas_ddot_p != NULL) ddot_res = cblas_ddot_p(n, x, incx, y, incy); @@ -2003,12 +2003,12 @@ static double cblas_ddot_wrapper(const int n, const double *x, const int incx, c return ddot_res; } -static double cblas_dsdot_wrapper(const int n, const float *x, const int incx, const float *y, +static double cblas_dsdot_wrapper(const int n, const float* x, const int incx, const float* y, const int incy) { double dsdot_res = 0.0; if (cblas_library() != NULL) { if (cblas_dsdot_p == NULL) - cblas_dsdot_p = (double (*)(const int n, const float *x, const int incx, const float *y, + cblas_dsdot_p = (double (*)(const int n, const float* x, const int incx, const float* y, const int incy))GET_FUNC(h_libcblas, "cblas_dsdot"); if (cblas_dsdot_p != NULL) dsdot_res = cblas_dsdot_p(n, x, incx, y, incy); @@ -2016,25 +2016,25 @@ static double cblas_dsdot_wrapper(const int n, const float *x, const int incx, c return dsdot_res; } -static float cblas_sdsdot_wrapper(const int n, const float sb, const float *x, const int incx, - const float *y, const int incy) { +static float cblas_sdsdot_wrapper(const int n, const float sb, const float* x, const int incx, + const float* y, const int incy) { float sdsdot_res = 0.0f; if (cblas_library() != NULL) { if (cblas_sdsdot_p == NULL) cblas_sdsdot_p = - (float (*)(const int n, const float sb, const float *x, const int incx, - const float *y, const int incy))GET_FUNC(h_libcblas, "cblas_sdsdot"); + (float (*)(const int n, const float sb, const float* x, const int incx, + const float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sdsdot"); if (cblas_sdsdot_p != NULL) sdsdot_res = cblas_sdsdot_p(n, sb, x, incx, y, incy); } return sdsdot_res; } -static float cblas_snrm2_wrapper(const int n, const float *x, const int incx) { +static float cblas_snrm2_wrapper(const int n, const float* x, const int incx) { float snrm2_res = 0.0f; if (cblas_library() != NULL) { if (cblas_snrm2_p == NULL) - cblas_snrm2_p = (float (*)(const int n, const float *x, const int incx))GET_FUNC( + cblas_snrm2_p = (float (*)(const int n, const float* x, const int incx))GET_FUNC( h_libcblas, "cblas_snrm2"); if (cblas_snrm2_p != NULL) snrm2_res = cblas_snrm2_p(n, x, incx); @@ -2042,11 +2042,11 @@ static float cblas_snrm2_wrapper(const int n, const float *x, const int incx) { return snrm2_res; } -static double cblas_dnrm2_wrapper(const int n, const double *x, const int incx) { +static double cblas_dnrm2_wrapper(const int n, const double* x, const int incx) { double dnrm2_res = 0.0; if (cblas_library() != NULL) { if (cblas_dnrm2_p == NULL) - cblas_dnrm2_p = (double (*)(const int n, const double *x, const int incx))GET_FUNC( + cblas_dnrm2_p = (double (*)(const int n, const double* x, const int incx))GET_FUNC( h_libcblas, "cblas_dnrm2"); if (cblas_dnrm2_p != NULL) dnrm2_res = cblas_dnrm2_p(n, x, incx); @@ -2054,11 +2054,11 @@ static double cblas_dnrm2_wrapper(const int n, const double *x, const int incx) return dnrm2_res; } -static float cblas_scnrm2_wrapper(const int n, const void *x, const int incx) { +static float cblas_scnrm2_wrapper(const int n, const void* x, const int incx) { float scnrm2_res = 0.0f; if (cblas_library() != NULL) { if (cblas_scnrm2_p == NULL) - cblas_scnrm2_p = (float (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_scnrm2_p = (float (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_scnrm2"); if (cblas_scnrm2_p != NULL) scnrm2_res = cblas_scnrm2_p(n, x, incx); @@ -2066,11 +2066,11 @@ static float cblas_scnrm2_wrapper(const int n, const void *x, const int incx) { return scnrm2_res; } -static double cblas_dznrm2_wrapper(const int n, const void *x, const int incx) { +static double cblas_dznrm2_wrapper(const int n, const void* x, const int incx) { double dznrm2_res = 0.0; if (cblas_library() != NULL) { if (cblas_dznrm2_p == NULL) - cblas_dznrm2_p = (double (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_dznrm2_p = (double (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_dznrm2"); if (cblas_dznrm2_p != NULL) dznrm2_res = cblas_dznrm2_p(n, x, incx); @@ -2078,297 +2078,297 @@ static double cblas_dznrm2_wrapper(const int n, const void *x, const int incx) { return dznrm2_res; } -static void cblas_srot_wrapper(const int n, float *x, const int incx, float *y, const int incy, +static void cblas_srot_wrapper(const int n, float* x, const int incx, float* y, const int incy, const float c, const float s) { if (cblas_library() != NULL) { if (cblas_srot_p == NULL) cblas_srot_p = - (void (*)(const int n, float *x, const int incx, float *y, const int incy, + (void (*)(const int n, float* x, const int incx, float* y, const int incy, const float c, const float s))GET_FUNC(h_libcblas, "cblas_srot"); if (cblas_srot_p != NULL) cblas_srot_p(n, x, incx, y, incy, c, s); } } -static void cblas_drot_wrapper(const int n, double *x, const int incx, double *y, const int incy, +static void cblas_drot_wrapper(const int n, double* x, const int incx, double* y, const int incy, const double c, const double s) { if (cblas_library() != NULL) { if (cblas_drot_p == NULL) cblas_drot_p = - (void (*)(const int n, double *x, const int incx, double *y, const int incy, + (void (*)(const int n, double* x, const int incx, double* y, const int incy, const double c, const double s))GET_FUNC(h_libcblas, "cblas_drot"); if (cblas_drot_p != NULL) cblas_drot_p(n, x, incx, y, incy, c, s); } } -static void csrot_wrapper(const int *n, void *x, const int *incx, void *y, const int *incy, - const float *c, const float *s) { +static void csrot_wrapper(const int* n, void* x, const int* incx, void* y, const int* incy, + const float* c, const float* s) { if (blas_library() != NULL) { if (csrot_p == NULL) - csrot_p = (void (*)(const int *n, void *x, const int *incx, void *y, const int *incy, - const float *c, const float *s))GET_FUNC(h_libblas, "csrot_"); + csrot_p = (void (*)(const int* n, void* x, const int* incx, void* y, const int* incy, + const float* c, const float* s))GET_FUNC(h_libblas, "csrot_"); if (csrot_p == NULL) - csrot_p = (void (*)(const int *n, void *x, const int *incx, void *y, const int *incy, - const float *c, const float *s))GET_FUNC(h_libblas, "CSROT"); + csrot_p = (void (*)(const int* n, void* x, const int* incx, void* y, const int* incy, + const float* c, const float* s))GET_FUNC(h_libblas, "CSROT"); if (csrot_p != NULL) csrot_p(n, x, incx, y, incy, c, s); } } -static void zdrot_wrapper(const int *n, void *x, const int *incx, void *y, const int *incy, - const double *c, const double *s) { +static void zdrot_wrapper(const int* n, void* x, const int* incx, void* y, const int* incy, + const double* c, const double* s) { if (blas_library() != NULL) { if (zdrot_p == NULL) - zdrot_p = (void (*)(const int *n, void *x, const int *incx, void *y, const int *incy, - const double *c, const double *s))GET_FUNC(h_libblas, "zdrot_"); + zdrot_p = (void (*)(const int* n, void* x, const int* incx, void* y, const int* incy, + const double* c, const double* s))GET_FUNC(h_libblas, "zdrot_"); if (zdrot_p == NULL) - zdrot_p = (void (*)(const int *n, void *x, const int *incx, void *y, const int *incy, - const double *c, const double *s))GET_FUNC(h_libblas, "ZDROT"); + zdrot_p = (void (*)(const int* n, void* x, const int* incx, void* y, const int* incy, + const double* c, const double* s))GET_FUNC(h_libblas, "ZDROT"); if (zdrot_p != NULL) zdrot_p(n, x, incx, y, incy, c, s); } } -static void cblas_srotg_wrapper(float *a, float *b, float *c, float *s) { +static void cblas_srotg_wrapper(float* a, float* b, float* c, float* s) { if (cblas_library() != NULL) { if (cblas_srotg_p == NULL) - cblas_srotg_p = (void (*)(float *a, float *b, float *c, float *s))GET_FUNC( + cblas_srotg_p = (void (*)(float* a, float* b, float* c, float* s))GET_FUNC( h_libcblas, "cblas_srotg"); if (cblas_srotg_p != NULL) cblas_srotg_p(a, b, c, s); } } -static void cblas_drotg_wrapper(double *a, double *b, double *c, double *s) { +static void cblas_drotg_wrapper(double* a, double* b, double* c, double* s) { if (cblas_library() != NULL) { if (cblas_drotg_p == NULL) - cblas_drotg_p = (void (*)(double *a, double *b, double *c, double *s))GET_FUNC( + cblas_drotg_p = (void (*)(double* a, double* b, double* c, double* s))GET_FUNC( h_libcblas, "cblas_drotg"); if (cblas_drotg_p != NULL) cblas_drotg_p(a, b, c, s); } } -static void crotg_wrapper(void *a, void *b, float *c, void *s) { +static void crotg_wrapper(void* a, void* b, float* c, void* s) { if (blas_library() != NULL) { if (crotg_p == NULL) - crotg_p = (void (*)(void *a, void *b, float *c, void *s))GET_FUNC(h_libblas, "crotg_"); + crotg_p = (void (*)(void* a, void* b, float* c, void* s))GET_FUNC(h_libblas, "crotg_"); if (crotg_p == NULL) - crotg_p = (void (*)(void *a, void *b, float *c, void *s))GET_FUNC(h_libblas, "CROTG"); + crotg_p = (void (*)(void* a, void* b, float* c, void* s))GET_FUNC(h_libblas, "CROTG"); if (crotg_p != NULL) crotg_p(a, b, c, s); } } -static void zrotg_wrapper(void *a, void *b, double *c, void *s) { +static void zrotg_wrapper(void* a, void* b, double* c, void* s) { if (blas_library() != NULL) { if (zrotg_p == NULL) - zrotg_p = (void (*)(void *a, void *b, double *c, void *s))GET_FUNC(h_libblas, "zrotg_"); + zrotg_p = (void (*)(void* a, void* b, double* c, void* s))GET_FUNC(h_libblas, "zrotg_"); if (zrotg_p == NULL) - zrotg_p = (void (*)(void *a, void *b, double *c, void *s))GET_FUNC(h_libblas, "ZROTG"); + zrotg_p = (void (*)(void* a, void* b, double* c, void* s))GET_FUNC(h_libblas, "ZROTG"); if (zrotg_p != NULL) zrotg_p(a, b, c, s); } } -static void cblas_srotm_wrapper(const int n, float *x, const int incx, float *y, const int incy, - const float *param) { +static void cblas_srotm_wrapper(const int n, float* x, const int incx, float* y, const int incy, + const float* param) { if (cblas_library() != NULL) { if (cblas_srotm_p == NULL) cblas_srotm_p = - (void (*)(const int n, float *x, const int incx, float *y, const int incy, - const float *param))GET_FUNC(h_libcblas, "cblas_srotm"); + (void (*)(const int n, float* x, const int incx, float* y, const int incy, + const float* param))GET_FUNC(h_libcblas, "cblas_srotm"); if (cblas_srotm_p != NULL) cblas_srotm_p(n, x, incx, y, incy, param); } } -static void cblas_drotm_wrapper(const int n, double *x, const int incx, double *y, const int incy, - const double *param) { +static void cblas_drotm_wrapper(const int n, double* x, const int incx, double* y, const int incy, + const double* param) { if (cblas_library() != NULL) { if (cblas_drotm_p == NULL) cblas_drotm_p = - (void (*)(const int n, double *x, const int incx, double *y, const int incy, - const double *param))GET_FUNC(h_libcblas, "cblas_drotm"); + (void (*)(const int n, double* x, const int incx, double* y, const int incy, + const double* param))GET_FUNC(h_libcblas, "cblas_drotm"); if (cblas_drotm_p != NULL) cblas_drotm_p(n, x, incx, y, incy, param); } } -static void cblas_srotmg_wrapper(float *d1, float *d2, float *x1, float y1, float *param) { +static void cblas_srotmg_wrapper(float* d1, float* d2, float* x1, float y1, float* param) { if (cblas_library() != NULL) { if (cblas_srotmg_p == NULL) - cblas_srotmg_p = (void (*)(float *d1, float *d2, float *x1, float y1, - float *param))GET_FUNC(h_libcblas, "cblas_srotmg"); + cblas_srotmg_p = (void (*)(float* d1, float* d2, float* x1, float y1, + float* param))GET_FUNC(h_libcblas, "cblas_srotmg"); if (cblas_srotmg_p != NULL) cblas_srotmg_p(d1, d2, x1, y1, param); } } -static void cblas_drotmg_wrapper(double *d1, double *d2, double *x1, double y1, double *param) { +static void cblas_drotmg_wrapper(double* d1, double* d2, double* x1, double y1, double* param) { if (cblas_library() != NULL) { if (cblas_drotmg_p == NULL) - cblas_drotmg_p = (void (*)(double *d1, double *d2, double *x1, double y1, - double *param))GET_FUNC(h_libcblas, "cblas_drotmg"); + cblas_drotmg_p = (void (*)(double* d1, double* d2, double* x1, double y1, + double* param))GET_FUNC(h_libcblas, "cblas_drotmg"); if (cblas_drotmg_p != NULL) cblas_drotmg_p(d1, d2, x1, y1, param); } } -static void cblas_sscal_wrapper(const int n, const float alpha, float *x, const int incx) { +static void cblas_sscal_wrapper(const int n, const float alpha, float* x, const int incx) { if (cblas_library() != NULL) { if (cblas_sscal_p == NULL) - cblas_sscal_p = (void (*)(const int n, const float alpha, float *x, + cblas_sscal_p = (void (*)(const int n, const float alpha, float* x, const int incx))GET_FUNC(h_libcblas, "cblas_sscal"); if (cblas_sscal_p != NULL) cblas_sscal_p(n, alpha, x, incx); } } -static void cblas_dscal_wrapper(const int n, const double alpha, double *x, const int incx) { +static void cblas_dscal_wrapper(const int n, const double alpha, double* x, const int incx) { if (cblas_library() != NULL) { if (cblas_dscal_p == NULL) - cblas_dscal_p = (void (*)(const int n, const double alpha, double *x, + cblas_dscal_p = (void (*)(const int n, const double alpha, double* x, const int incx))GET_FUNC(h_libcblas, "cblas_dscal"); if (cblas_dscal_p != NULL) cblas_dscal_p(n, alpha, x, incx); } } -static void cblas_cscal_wrapper(const int n, const void *alpha, void *x, const int incx) { +static void cblas_cscal_wrapper(const int n, const void* alpha, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_cscal_p == NULL) - cblas_cscal_p = (void (*)(const int n, const void *alpha, void *x, + cblas_cscal_p = (void (*)(const int n, const void* alpha, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_cscal"); if (cblas_cscal_p != NULL) cblas_cscal_p(n, alpha, x, incx); } } -static void cblas_zscal_wrapper(const int n, const void *alpha, void *x, const int incx) { +static void cblas_zscal_wrapper(const int n, const void* alpha, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_zscal_p == NULL) - cblas_zscal_p = (void (*)(const int n, const void *alpha, void *x, + cblas_zscal_p = (void (*)(const int n, const void* alpha, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_zscal"); if (cblas_zscal_p != NULL) cblas_zscal_p(n, alpha, x, incx); } } -static void cblas_csscal_wrapper(const int n, const float alpha, void *x, const int incx) { +static void cblas_csscal_wrapper(const int n, const float alpha, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_csscal_p == NULL) - cblas_csscal_p = (void (*)(const int n, const float alpha, void *x, + cblas_csscal_p = (void (*)(const int n, const float alpha, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_csscal"); if (cblas_csscal_p != NULL) cblas_csscal_p(n, alpha, x, incx); } } -static void cblas_zdscal_wrapper(const int n, const double alpha, void *x, const int incx) { +static void cblas_zdscal_wrapper(const int n, const double alpha, void* x, const int incx) { if (cblas_library() != NULL) { if (cblas_zdscal_p == NULL) - cblas_zdscal_p = (void (*)(const int n, const double alpha, void *x, + cblas_zdscal_p = (void (*)(const int n, const double alpha, void* x, const int incx))GET_FUNC(h_libcblas, "cblas_zdscal"); if (cblas_zdscal_p != NULL) cblas_zdscal_p(n, alpha, x, incx); } } -static void cblas_sswap_wrapper(const int n, float *x, const int incx, float *y, const int incy) { +static void cblas_sswap_wrapper(const int n, float* x, const int incx, float* y, const int incy) { if (cblas_library() != NULL) { if (cblas_sswap_p == NULL) - cblas_sswap_p = (void (*)(const int n, float *x, const int incx, float *y, + cblas_sswap_p = (void (*)(const int n, float* x, const int incx, float* y, const int incy))GET_FUNC(h_libcblas, "cblas_sswap"); if (cblas_sswap_p != NULL) cblas_sswap_p(n, x, incx, y, incy); } } -static void cblas_dswap_wrapper(const int n, double *x, const int incx, double *y, const int incy) { +static void cblas_dswap_wrapper(const int n, double* x, const int incx, double* y, const int incy) { if (cblas_library() != NULL) { if (cblas_dswap_p == NULL) - cblas_dswap_p = (void (*)(const int n, double *x, const int incx, double *y, + cblas_dswap_p = (void (*)(const int n, double* x, const int incx, double* y, const int incy))GET_FUNC(h_libcblas, "cblas_dswap"); if (cblas_dswap_p != NULL) cblas_dswap_p(n, x, incx, y, incy); } } -static void cblas_cswap_wrapper(const int n, void *x, const int incx, void *y, const int incy) { +static void cblas_cswap_wrapper(const int n, void* x, const int incx, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_cswap_p == NULL) - cblas_cswap_p = (void (*)(const int n, void *x, const int incx, void *y, + cblas_cswap_p = (void (*)(const int n, void* x, const int incx, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_cswap"); if (cblas_cswap_p != NULL) cblas_cswap_p(n, x, incx, y, incy); } } -static void cblas_zswap_wrapper(const int n, void *x, const int incx, void *y, const int incy) { +static void cblas_zswap_wrapper(const int n, void* x, const int incx, void* y, const int incy) { if (cblas_library() != NULL) { if (cblas_zswap_p == NULL) - cblas_zswap_p = (void (*)(const int n, void *x, const int incx, void *y, + cblas_zswap_p = (void (*)(const int n, void* x, const int incx, void* y, const int incy))GET_FUNC(h_libcblas, "cblas_zswap"); if (cblas_zswap_p != NULL) cblas_zswap_p(n, x, incx, y, incy); } } -static void cblas_cdotc_sub_wrapper(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres) { +static void cblas_cdotc_sub_wrapper(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres) { if (cblas_library() != NULL) { if (cblas_cdotc_sub_p == NULL) cblas_cdotc_sub_p = - (void (*)(const int n, const void *x, const int incx, const void *y, const int incy, - void *pres))GET_FUNC(h_libcblas, "cblas_cdotc_sub"); + (void (*)(const int n, const void* x, const int incx, const void* y, const int incy, + void* pres))GET_FUNC(h_libcblas, "cblas_cdotc_sub"); if (cblas_cdotc_sub_p != NULL) cblas_cdotc_sub_p(n, x, incx, y, incy, pres); } } -static void cblas_zdotc_sub_wrapper(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres) { +static void cblas_zdotc_sub_wrapper(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres) { if (cblas_library() != NULL) { if (cblas_zdotc_sub_p == NULL) cblas_zdotc_sub_p = - (void (*)(const int n, const void *x, const int incx, const void *y, const int incy, - void *pres))GET_FUNC(h_libcblas, "cblas_zdotc_sub"); + (void (*)(const int n, const void* x, const int incx, const void* y, const int incy, + void* pres))GET_FUNC(h_libcblas, "cblas_zdotc_sub"); if (cblas_zdotc_sub_p != NULL) cblas_zdotc_sub_p(n, x, incx, y, incy, pres); } } -static void cblas_cdotu_sub_wrapper(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres) { +static void cblas_cdotu_sub_wrapper(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres) { if (cblas_library() != NULL) { if (cblas_cdotu_sub_p == NULL) cblas_cdotu_sub_p = - (void (*)(const int n, const void *x, const int incx, const void *y, const int incy, - void *pres))GET_FUNC(h_libcblas, "cblas_cdotu_sub"); + (void (*)(const int n, const void* x, const int incx, const void* y, const int incy, + void* pres))GET_FUNC(h_libcblas, "cblas_cdotu_sub"); if (cblas_cdotu_sub_p != NULL) cblas_cdotu_sub_p(n, x, incx, y, incy, pres); } } -static void cblas_zdotu_sub_wrapper(const int n, const void *x, const int incx, const void *y, - const int incy, void *pres) { +static void cblas_zdotu_sub_wrapper(const int n, const void* x, const int incx, const void* y, + const int incy, void* pres) { if (cblas_library() != NULL) { if (cblas_zdotu_sub_p == NULL) cblas_zdotu_sub_p = - (void (*)(const int n, const void *x, const int incx, const void *y, const int incy, - void *pres))GET_FUNC(h_libcblas, "cblas_zdotu_sub"); + (void (*)(const int n, const void* x, const int incx, const void* y, const int incy, + void* pres))GET_FUNC(h_libcblas, "cblas_zdotu_sub"); if (cblas_zdotu_sub_p != NULL) cblas_zdotu_sub_p(n, x, incx, y, incy, pres); } } -static int cblas_isamax_wrapper(const int n, const float *x, const int incx) { +static int cblas_isamax_wrapper(const int n, const float* x, const int incx) { int isamax_res = 0; if (cblas_library() != NULL) { if (cblas_isamax_p == NULL) - cblas_isamax_p = (int (*)(const int n, const float *x, const int incx))GET_FUNC( + cblas_isamax_p = (int (*)(const int n, const float* x, const int incx))GET_FUNC( h_libcblas, "cblas_isamax"); if (cblas_isamax_p != NULL) isamax_res = cblas_isamax_p(n, x, incx); @@ -2376,11 +2376,11 @@ static int cblas_isamax_wrapper(const int n, const float *x, const int incx) { return isamax_res; } -static int cblas_idamax_wrapper(const int n, const double *x, const int incx) { +static int cblas_idamax_wrapper(const int n, const double* x, const int incx) { int idamax_res = 0; if (cblas_library() != NULL) { if (cblas_idamax_p == NULL) - cblas_idamax_p = (int (*)(const int n, const double *x, const int incx))GET_FUNC( + cblas_idamax_p = (int (*)(const int n, const double* x, const int incx))GET_FUNC( h_libcblas, "cblas_idamax"); if (cblas_idamax_p != NULL) idamax_res = cblas_idamax_p(n, x, incx); @@ -2388,11 +2388,11 @@ static int cblas_idamax_wrapper(const int n, const double *x, const int incx) { return idamax_res; } -static int cblas_icamax_wrapper(const int n, const void *x, const int incx) { +static int cblas_icamax_wrapper(const int n, const void* x, const int incx) { int icamax_res = 0; if (cblas_library() != NULL) { if (cblas_icamax_p == NULL) - cblas_icamax_p = (int (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_icamax_p = (int (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_icamax"); if (cblas_icamax_p != NULL) icamax_res = cblas_icamax_p(n, x, incx); @@ -2400,11 +2400,11 @@ static int cblas_icamax_wrapper(const int n, const void *x, const int incx) { return icamax_res; } -static int cblas_izamax_wrapper(const int n, const void *x, const int incx) { +static int cblas_izamax_wrapper(const int n, const void* x, const int incx) { int izamax_res = 0; if (cblas_library() != NULL) { if (cblas_izamax_p == NULL) - cblas_izamax_p = (int (*)(const int n, const void *x, const int incx))GET_FUNC( + cblas_izamax_p = (int (*)(const int n, const void* x, const int incx))GET_FUNC( h_libcblas, "cblas_izamax"); if (cblas_izamax_p != NULL) izamax_res = cblas_izamax_p(n, x, incx); diff --git a/tests/unit_tests/blas/include/test_common.hpp b/tests/unit_tests/blas/include/test_common.hpp index 5d607991e..0b64d3acc 100644 --- a/tests/unit_tests/blas/include/test_common.hpp +++ b/tests/unit_tests/blas/include/test_common.hpp @@ -86,7 +86,7 @@ constexpr T matrix_size(oneapi::mkl::layout layout, oneapi::mkl::transpose trans // SYCL buffer creation helper. template -sycl::buffer make_buffer(const vec &v) { +sycl::buffer make_buffer(const vec& v) { sycl::buffer buf(v.data(), sycl::range<1>(v.size())); return buf; } @@ -174,14 +174,14 @@ std::complex rand_scalar(int mag) { } template -void rand_vector(fp *v, int n, int inc) { +void rand_vector(fp* v, int n, int inc) { int abs_inc = std::abs(inc); for (int i = 0; i < n; i++) v[i * abs_inc] = rand_scalar(); } template -void rand_vector(vec &v, int n, int inc) { +void rand_vector(vec& v, int n, int inc) { using fp = typename vec::value_type; int abs_inc = std::abs(inc); @@ -209,7 +209,7 @@ oneapi::mkl::transpose rand_trans() { } template -void print_matrix(vec &M, oneapi::mkl::transpose trans, int m, int n, int ld, char *name) { +void print_matrix(vec& M, oneapi::mkl::transpose trans, int m, int n, int ld, char* name) { std::cout << "Matrix " << name << ":\n"; for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { @@ -223,15 +223,15 @@ void print_matrix(vec &M, oneapi::mkl::transpose trans, int m, int n, int ld, ch } template -void copy_vector(fp *src, int n, int inc, fp *dest) { +void copy_vector(fp* src, int n, int inc, fp* dest) { int abs_inc = std::abs(inc); for (int i = 0; i < n; i++) dest[i * abs_inc] = src[i * abs_inc]; } template -void copy_matrix(vec_src &src, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, - int n, int ld, vec_dest &dest) { +void copy_matrix(vec_src& src, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, + int n, int ld, vec_dest& dest) { using T_data = typename vec_dest::value_type; dest.resize(matrix_size(layout, trans, m, n, ld)); if (((trans == oneapi::mkl::transpose::nontrans) && @@ -250,8 +250,8 @@ void copy_matrix(vec_src &src, oneapi::mkl::layout layout, oneapi::mkl::transpos } template -void copy_matrix(fp_src *src, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, - int n, int ld, fp_dst *dest) { +void copy_matrix(fp_src* src, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, + int n, int ld, fp_dst* dest) { if (((trans == oneapi::mkl::transpose::nontrans) && (layout == oneapi::mkl::layout::col_major)) || ((trans != oneapi::mkl::transpose::nontrans) && @@ -268,7 +268,7 @@ void copy_matrix(fp_src *src, oneapi::mkl::layout layout, oneapi::mkl::transpose } template -void rand_matrix(vec &M, oneapi::mkl::transpose trans, int m, int n, int ld) { +void rand_matrix(vec& M, oneapi::mkl::transpose trans, int m, int n, int ld) { using fp = typename vec::value_type; M.resize(matrix_size(trans, m, n, ld)); @@ -286,7 +286,7 @@ void rand_matrix(vec &M, oneapi::mkl::transpose trans, int m, int n, int ld) { } template -void rand_matrix(vec &M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n, +void rand_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n, int ld) { using fp = typename vec::value_type; @@ -308,7 +308,7 @@ void rand_matrix(vec &M, oneapi::mkl::layout layout, oneapi::mkl::transpose tran } template -void rand_matrix(fp *M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n, +void rand_matrix(fp* M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n, int ld) { if (((trans == oneapi::mkl::transpose::nontrans) && (layout == oneapi::mkl::layout::col_major)) || @@ -326,7 +326,7 @@ void rand_matrix(fp *M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans } template -void rand_trsm_matrix(vec &M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, +void rand_trsm_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n, int ld) { using fp = typename vec::value_type; @@ -356,7 +356,7 @@ void rand_trsm_matrix(vec &M, oneapi::mkl::layout layout, oneapi::mkl::transpose } template -void rand_trsm_matrix(fp *M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n, +void rand_trsm_matrix(fp* M, oneapi::mkl::layout layout, oneapi::mkl::transpose trans, int m, int n, int ld) { if (((trans == oneapi::mkl::transpose::nontrans) && (layout == oneapi::mkl::layout::col_major)) || @@ -382,7 +382,7 @@ void rand_trsm_matrix(fp *M, oneapi::mkl::layout layout, oneapi::mkl::transpose } template -void rand_tpsv_matrix(vec &M, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, +void rand_tpsv_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, int m) { using fp = typename vec::value_type; std::vector tmp; @@ -408,7 +408,7 @@ void rand_tpsv_matrix(vec &M, oneapi::mkl::layout layout, oneapi::mkl::uplo uppe } template -void rand_tbsv_matrix(vec &M, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, +void rand_tbsv_matrix(vec& M, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, oneapi::mkl::transpose trans, int m, int k, int ld) { using fp = typename vec::value_type; std::vector tmp; @@ -461,7 +461,7 @@ typename std::enable_if::value, bool>::type check_equal(fp } template -bool check_equal_ptr(sycl::queue queue, fp *x, fp x_ref, int error_mag) { +bool check_equal_ptr(sycl::queue queue, fp* x, fp x_ref, int error_mag) { fp x_host; queue.memcpy(&x_host, x, sizeof(fp)).wait(); return check_equal(x_host, x_ref, error_mag); @@ -485,7 +485,7 @@ bool check_equal_trsm(fp x, fp x_ref, int error_mag) { } template -bool check_equal(fp x, fp x_ref, int error_mag, std::ostream &out) { +bool check_equal(fp x, fp x_ref, int error_mag, std::ostream& out) { bool good = check_equal(x, x_ref, error_mag); if (!good) { @@ -495,15 +495,15 @@ bool check_equal(fp x, fp x_ref, int error_mag, std::ostream &out) { } template -bool check_equal_ptr(sycl::queue queue, fp *x, fp x_ref, int error_mag, std::ostream &out) { +bool check_equal_ptr(sycl::queue queue, fp* x, fp x_ref, int error_mag, std::ostream& out) { fp x_host; queue.memcpy(&x_host, x, sizeof(fp)).wait(); return check_equal(x_host, x_ref, error_mag, out); } template -bool check_equal_vector(const fp *v, const fp *v_ref, int n, int inc, int error_mag, - std::ostream &out) { +bool check_equal_vector(const fp* v, const fp* v_ref, int n, int inc, int error_mag, + std::ostream& out) { int abs_inc = std::abs(inc), count = 0; bool good = true; @@ -523,7 +523,7 @@ bool check_equal_vector(const fp *v, const fp *v_ref, int n, int inc, int error_ } template -bool check_equal_vector(vec1 &v, vec2 &v_ref, int n, int inc, int error_mag, std::ostream &out) { +bool check_equal_vector(vec1& v, vec2& v_ref, int n, int inc, int error_mag, std::ostream& out) { int abs_inc = std::abs(inc), count = 0; bool good = true; @@ -543,8 +543,8 @@ bool check_equal_vector(vec1 &v, vec2 &v_ref, int n, int inc, int error_mag, std } template -bool check_equal_trsv_vector(vec1 &v, vec2 &v_ref, int n, int inc, int error_mag, - std::ostream &out) { +bool check_equal_trsv_vector(vec1& v, vec2& v_ref, int n, int inc, int error_mag, + std::ostream& out) { int abs_inc = std::abs(inc), count = 0; bool good = true; @@ -564,8 +564,8 @@ bool check_equal_trsv_vector(vec1 &v, vec2 &v_ref, int n, int inc, int error_mag } template -bool check_equal_matrix(acc1 &M, acc2 &M_ref, oneapi::mkl::layout layout, int m, int n, int ld, - int error_mag, std::ostream &out) { +bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::mkl::layout layout, int m, int n, int ld, + int error_mag, std::ostream& out) { bool good = true; int idx, count = 0; for (int j = 0; j < n; j++) { @@ -586,8 +586,8 @@ bool check_equal_matrix(acc1 &M, acc2 &M_ref, oneapi::mkl::layout layout, int m, } template -bool check_equal_matrix(const fp *M, const fp *M_ref, oneapi::mkl::layout layout, int m, int n, - int ld, int error_mag, std::ostream &out) { +bool check_equal_matrix(const fp* M, const fp* M_ref, oneapi::mkl::layout layout, int m, int n, + int ld, int error_mag, std::ostream& out) { bool good = true; int idx, count = 0; for (int j = 0; j < n; j++) { @@ -608,9 +608,9 @@ bool check_equal_matrix(const fp *M, const fp *M_ref, oneapi::mkl::layout layout } template -bool check_equal_matrix(acc1 &M, acc2 &M_ref, oneapi::mkl::layout layout, +bool check_equal_matrix(acc1& M, acc2& M_ref, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int m, int n, int ld, int error_mag, - std::ostream &out) { + std::ostream& out) { bool good = true; int idx, count = 0; for (int j = 0; j < n; j++) { @@ -634,8 +634,8 @@ bool check_equal_matrix(acc1 &M, acc2 &M_ref, oneapi::mkl::layout layout, } template -bool check_equal_trsm_matrix(acc1 &M, acc2 &M_ref, oneapi::mkl::layout layout, int m, int n, int ld, - int error_mag, std::ostream &out) { +bool check_equal_trsm_matrix(acc1& M, acc2& M_ref, oneapi::mkl::layout layout, int m, int n, int ld, + int error_mag, std::ostream& out) { bool good = true; int idx, count = 0; for (int j = 0; j < n; j++) { @@ -677,8 +677,8 @@ typename std::enable_if::value, bool>::type check_almost_eq } template -bool check_almost_equal_matrix_int(Ta &M, Tb &M_ref, oneapi::mkl::layout layout, int m, int n, - int ld, int error_mag, std::ostream &out) { +bool check_almost_equal_matrix_int(Ta& M, Tb& M_ref, oneapi::mkl::layout layout, int m, int n, + int ld, int error_mag, std::ostream& out) { static_assert(is_matrix_type_integral() && is_matrix_type_integral()); bool good = true; int idx, count = 0; @@ -700,8 +700,8 @@ bool check_almost_equal_matrix_int(Ta &M, Tb &M_ref, oneapi::mkl::layout layout, } template -bool check_almost_equal_matrix(Ta &M, Tb &M_ref, oneapi::mkl::layout layout, int m, int n, int ld, - int error_mag, std::ostream &out) { +bool check_almost_equal_matrix(Ta& M, Tb& M_ref, oneapi::mkl::layout layout, int m, int n, int ld, + int error_mag, std::ostream& out) { // Only call if returned dtype is integral if constexpr (is_matrix_type_integral() && is_matrix_type_integral()) return check_almost_equal_matrix_int(M, M_ref, layout, m, n, ld, error_mag, out); diff --git a/tests/unit_tests/blas/level1/axpby.cpp b/tests/unit_tests/blas/level1/axpby.cpp index d43f9beda..4234e5259 100644 --- a/tests/unit_tests/blas/level1/axpby.cpp +++ b/tests/unit_tests/blas/level1/axpby.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha, fp beta) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha, fp beta) { // Prepare data. vector x, y, y_ref; @@ -58,18 +58,18 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::axpby(&N_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + ::axpby(&N_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ AXPBY. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPBY:\n" << e.what() << std::endl; print_error_code(e); @@ -109,16 +109,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPBY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPBY:\n" << error.what() << std::endl; } @@ -130,8 +130,8 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp return (int)good; } -class AxpbyTests - : public ::testing::TestWithParam> {}; +class AxpbyTests : public ::testing::TestWithParam> { +}; TEST_P(AxpbyTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level1/axpby_usm.cpp b/tests/unit_tests/blas/level1/axpby_usm.cpp index ae85ca8f1..1459f1900 100644 --- a/tests/unit_tests/blas/level1/axpby_usm.cpp +++ b/tests/unit_tests/blas/level1/axpby_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha, fp beta) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha, fp beta) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPBY:\n" << e.what() << std::endl; print_error_code(e); @@ -79,8 +79,8 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::axpby(&N_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + ::axpby(&N_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ AXPBY. @@ -113,16 +113,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPBY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPBY:\n" << error.what() << std::endl; } @@ -134,7 +134,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp } class AxpbyUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpbyUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level1/axpy.cpp b/tests/unit_tests/blas/level1/axpy.cpp index c81f2902d..a0fbdc4c6 100644 --- a/tests/unit_tests/blas/level1/axpy.cpp +++ b/tests/unit_tests/blas/level1/axpy.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha) { // Prepare data. vector x, y, y_ref; @@ -58,18 +58,17 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::axpy(&N_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y_ref.data(), - &incy_ref); + ::axpy(&N_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ AXPY. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY:\n" << e.what() << std::endl; print_error_code(e); @@ -109,16 +108,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY:\n" << error.what() << std::endl; } @@ -130,7 +129,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp return (int)good; } -class AxpyTests : public ::testing::TestWithParam> { +class AxpyTests : public ::testing::TestWithParam> { }; TEST_P(AxpyTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/axpy_usm.cpp b/tests/unit_tests/blas/level1/axpy_usm.cpp index da68f173c..651b70a58 100644 --- a/tests/unit_tests/blas/level1/axpy_usm.cpp +++ b/tests/unit_tests/blas/level1/axpy_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp alpha) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during AXPY:\n" << e.what() << std::endl; print_error_code(e); @@ -79,8 +79,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::axpy(&N_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y_ref.data(), - &incy_ref); + ::axpy(&N_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ AXPY. @@ -113,16 +112,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during AXPY:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of AXPY:\n" << error.what() << std::endl; } @@ -134,7 +133,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp } class AxpyUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(AxpyUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level1/dotc.cpp b/tests/unit_tests/blas/level1/dotc.cpp index cb8d0fc37..f420a5e9f 100644 --- a/tests/unit_tests/blas/level1/dotc.cpp +++ b/tests/unit_tests/blas/level1/dotc.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) { // Prepare data. vector x, y; fp result = 0.0, result_reference = 0.0; @@ -58,18 +58,18 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::dotc((fp_ref *)&result_reference, &N_ref, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), + ::dotc((fp_ref*)&result_reference, &N_ref, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref); // Call DPC++ DOTC. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DOTC:\n" << e.what() << std::endl; print_error_code(e); @@ -110,16 +110,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DOTC:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DOTC:\n" << error.what() << std::endl; } @@ -131,7 +131,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { return (int)good; } -class DotcTests : public ::testing::TestWithParam> { +class DotcTests : public ::testing::TestWithParam> { }; TEST_P(DotcTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/dotc_usm.cpp b/tests/unit_tests/blas/level1/dotc_usm.cpp index ad05c9d3b..9c08125f3 100644 --- a/tests/unit_tests/blas/level1/dotc_usm.cpp +++ b/tests/unit_tests/blas/level1/dotc_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DOTC:\n" << e.what() << std::endl; print_error_code(e); @@ -78,12 +78,12 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::dotc((fp_ref *)&result_reference, &N_ref, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), + ::dotc((fp_ref*)&result_reference, &N_ref, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref); // Call DPC++ DOTC. - auto result_p = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); + auto result_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); try { #ifdef CALL_RT_API @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DOTC:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DOTC:\n" << error.what() << std::endl; } @@ -137,7 +137,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { } class DotcUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DotcUsmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP( diff --git a/tests/unit_tests/blas/level1/dotu.cpp b/tests/unit_tests/blas/level1/dotu.cpp index bbef3ad8c..b6b3dd536 100644 --- a/tests/unit_tests/blas/level1/dotu.cpp +++ b/tests/unit_tests/blas/level1/dotu.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) { // Prepare data. vector x, y; fp result = 0.0, result_reference = 0.0; @@ -58,18 +58,18 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::dotu((fp_ref *)&result_reference, &N_ref, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), + ::dotu((fp_ref*)&result_reference, &N_ref, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref); // Call DPC++ DOTU. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DOTU:\n" << e.what() << std::endl; print_error_code(e); @@ -110,16 +110,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DOTU:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DOTU:\n" << error.what() << std::endl; } @@ -131,7 +131,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { return (int)good; } -class DotuTests : public ::testing::TestWithParam> { +class DotuTests : public ::testing::TestWithParam> { }; TEST_P(DotuTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/dotu_usm.cpp b/tests/unit_tests/blas/level1/dotu_usm.cpp index 3f30bf5ff..6f7c4a63f 100644 --- a/tests/unit_tests/blas/level1/dotu_usm.cpp +++ b/tests/unit_tests/blas/level1/dotu_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during DOTU:\n" << e.what() << std::endl; print_error_code(e); @@ -78,12 +78,12 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::dotu((fp_ref *)&result_reference, &N_ref, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), + ::dotu((fp_ref*)&result_reference, &N_ref, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref); // Call DPC++ DOTU. - auto result_p = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); + auto result_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); try { #ifdef CALL_RT_API @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during DOTU:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of DOTU:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy) { } class DotuUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(DotuUsmTests, ComplexSinglePrecision) { EXPECT_TRUEORSKIP( diff --git a/tests/unit_tests/blas/level1/rot.cpp b/tests/unit_tests/blas/level1/rot.cpp index f65540182..12a26ce71 100644 --- a/tests/unit_tests/blas/level1/rot.cpp +++ b/tests/unit_tests/blas/level1/rot.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_scalar c, +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_scalar c, fp_scalar s) { // Prepare data. vector x, x_ref, y, y_ref; @@ -59,18 +59,18 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_ using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::rot(&N_ref, (fp_ref *)x_ref.data(), &incx_ref, (fp_ref *)y_ref.data(), &incy_ref, - (fp_scalar *)&c, (fp_scalar *)&s); + ::rot(&N_ref, (fp_ref*)x_ref.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref, + (fp_scalar*)&c, (fp_scalar*)&s); // Call DPC++ ROT. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROT:\n" << e.what() << std::endl; print_error_code(e); @@ -110,16 +110,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_ } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROT:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROT:\n" << error.what() << std::endl; } @@ -135,8 +135,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_ return (int)good; } -class RotTests : public ::testing::TestWithParam> { -}; +class RotTests : public ::testing::TestWithParam> {}; TEST_P(RotTests, RealSinglePrecision) { float c(2.0); diff --git a/tests/unit_tests/blas/level1/rot_usm.cpp b/tests/unit_tests/blas/level1/rot_usm.cpp index 287ac285b..6c19b0ceb 100644 --- a/tests/unit_tests/blas/level1/rot_usm.cpp +++ b/tests/unit_tests/blas/level1/rot_usm.cpp @@ -41,20 +41,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_scalar c, +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_scalar c, fp_scalar s) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROT:\n" << e.what() << std::endl; print_error_code(e); @@ -80,8 +80,8 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_ using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::rot(&N_ref, (fp_ref *)x_ref.data(), &incx_ref, (fp_ref *)y_ref.data(), &incy_ref, - (fp_scalar *)&c, (fp_scalar *)&s); + ::rot(&N_ref, (fp_ref*)x_ref.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref, + (fp_scalar*)&c, (fp_scalar*)&s); // Call DPC++ ROT. @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_ main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROT:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROT:\n" << error.what() << std::endl; } @@ -137,7 +137,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp_ } class RotUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(RotUsmTests, RealSinglePrecision) { float c(2.0); diff --git a/tests/unit_tests/blas/level1/rotg.cpp b/tests/unit_tests/blas/level1/rotg.cpp index 1a0d569d8..4abcddd39 100644 --- a/tests/unit_tests/blas/level1/rotg.cpp +++ b/tests/unit_tests/blas/level1/rotg.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Prepare data. fp a, b, s, a_ref, b_ref, s_ref; fp_scalar c, c_ref; @@ -64,17 +64,17 @@ int test(device *dev, oneapi::mkl::layout layout) { // Call Reference ROTG. using fp_ref = typename ref_type_info::type; - ::rotg((fp_ref *)&a_ref, (fp_ref *)&b_ref, (fp_scalar *)&c_ref, (fp_ref *)&s_ref); + ::rotg((fp_ref*)&a_ref, (fp_ref*)&b_ref, (fp_scalar*)&c_ref, (fp_ref*)&s_ref); // Call DPC++ ROTG. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTG:\n" << e.what() << std::endl; print_error_code(e); @@ -116,16 +116,16 @@ int test(device *dev, oneapi::mkl::layout layout) { } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTG:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTG:\n" << error.what() << std::endl; } @@ -144,7 +144,7 @@ int test(device *dev, oneapi::mkl::layout layout) { return (int)good; } -class RotgTests : public ::testing::TestWithParam> { +class RotgTests : public ::testing::TestWithParam> { }; TEST_P(RotgTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/rotg_usm.cpp b/tests/unit_tests/blas/level1/rotg_usm.cpp index de71a793d..d078ff03a 100644 --- a/tests/unit_tests/blas/level1/rotg_usm.cpp +++ b/tests/unit_tests/blas/level1/rotg_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTG:\n" << e.what() << std::endl; print_error_code(e); @@ -83,22 +83,22 @@ int test(device *dev, oneapi::mkl::layout layout) { // Call Reference ROTG. using fp_ref = typename ref_type_info::type; - ::rotg((fp_ref *)&a_ref, (fp_ref *)&b_ref, (fp_scalar *)&c_ref, (fp_ref *)&s_ref); + ::rotg((fp_ref*)&a_ref, (fp_ref*)&b_ref, (fp_scalar*)&c_ref, (fp_ref*)&s_ref); // Call DPC++ ROTG. fp *a_p, *b_p, *s_p; - fp_scalar *c_p; + fp_scalar* c_p; if constexpr (alloc_type == usm::alloc::shared) { - a_p = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); - b_p = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); - s_p = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); - c_p = (fp_scalar *)oneapi::mkl::malloc_shared(64, sizeof(fp_scalar), *dev, cxt); + a_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); + b_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); + s_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); + c_p = (fp_scalar*)oneapi::mkl::malloc_shared(64, sizeof(fp_scalar), *dev, cxt); } else if constexpr (alloc_type == usm::alloc::device) { - a_p = (fp *)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); - b_p = (fp *)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); - s_p = (fp *)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); - c_p = (fp_scalar *)oneapi::mkl::malloc_device(64, sizeof(fp_scalar), *dev, cxt); + a_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); + b_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); + s_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); + c_p = (fp_scalar*)oneapi::mkl::malloc_device(64, sizeof(fp_scalar), *dev, cxt); } else { throw std::runtime_error("Bad alloc_type"); @@ -139,16 +139,16 @@ int test(device *dev, oneapi::mkl::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTG:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTG:\n" << error.what() << std::endl; } @@ -170,7 +170,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class RotgUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(RotgUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP((test(std::get<0>(GetParam()), std::get<1>(GetParam())))); diff --git a/tests/unit_tests/blas/level1/rotm.cpp b/tests/unit_tests/blas/level1/rotm.cpp index ab2c599bf..4e4ba44ec 100644 --- a/tests/unit_tests/blas/level1/rotm.cpp +++ b/tests/unit_tests/blas/level1/rotm.cpp @@ -41,12 +41,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp flag) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp flag) { // Prepare data. vector x, x_ref, y, y_ref; vector param; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::rotm(&N_ref, (fp_ref *)x_ref.data(), &incx_ref, (fp_ref *)y_ref.data(), &incy_ref, - (fp_ref *)param.data()); + ::rotm(&N_ref, (fp_ref*)x_ref.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref, + (fp_ref*)param.data()); // Call DPC++ ROTM. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTM:\n" << e.what() << std::endl; print_error_code(e); @@ -113,16 +113,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTM:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTM:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp return (int)good; } -class RotmTests : public ::testing::TestWithParam> { +class RotmTests : public ::testing::TestWithParam> { }; TEST_P(RotmTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level1/rotm_usm.cpp b/tests/unit_tests/blas/level1/rotm_usm.cpp index 7723e096c..79ce634a1 100644 --- a/tests/unit_tests/blas/level1/rotm_usm.cpp +++ b/tests/unit_tests/blas/level1/rotm_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp flag) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp flag) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTM:\n" << e.what() << std::endl; print_error_code(e); @@ -81,8 +81,8 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp using fp_ref = typename ref_type_info::type; const int N_ref = N, incx_ref = incx, incy_ref = incy; - ::rotm(&N_ref, (fp_ref *)x_ref.data(), &incx_ref, (fp_ref *)y_ref.data(), &incy_ref, - (fp_ref *)param.data()); + ::rotm(&N_ref, (fp_ref*)x_ref.data(), &incx_ref, (fp_ref*)y_ref.data(), &incy_ref, + (fp_ref*)param.data()); // Call DPC++ ROTM. @@ -115,16 +115,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTM:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTM:\n" << error.what() << std::endl; } @@ -138,7 +138,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, fp } class RotmUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(RotmUsmTests, RealSinglePrecision) { float flag(-1.0); diff --git a/tests/unit_tests/blas/level1/rotmg_usm.cpp b/tests/unit_tests/blas/level1/rotmg_usm.cpp index 92eeee491..0afe7caca 100644 --- a/tests/unit_tests/blas/level1/rotmg_usm.cpp +++ b/tests/unit_tests/blas/level1/rotmg_usm.cpp @@ -41,19 +41,19 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout) { +int test(device* dev, oneapi::mkl::layout layout) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during ROTMG:\n" << e.what() << std::endl; print_error_code(e); @@ -82,14 +82,14 @@ int test(device *dev, oneapi::mkl::layout layout) { fp *d1_p, *d2_p, *x1_p; if constexpr (alloc_type == usm::alloc::device) { - d1_p = (fp *)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); - d2_p = (fp *)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); - x1_p = (fp *)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); + d1_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); + d2_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); + x1_p = (fp*)oneapi::mkl::malloc_device(64, sizeof(fp), *dev, cxt); } else if constexpr (alloc_type == usm::alloc::shared) { - d1_p = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); - d2_p = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); - x1_p = (fp *)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); + d1_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); + d2_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); + x1_p = (fp*)oneapi::mkl::malloc_shared(64, sizeof(fp), *dev, cxt); } else { throw std::runtime_error("Bad alloc_type"); @@ -101,7 +101,7 @@ int test(device *dev, oneapi::mkl::layout layout) { // Call Reference ROTMG. - ::rotmg(&d1_ref, &d2_ref, &x1_ref, &y1, (fp *)param_ref.data()); + ::rotmg(&d1_ref, &d2_ref, &x1_ref, &y1, (fp*)param_ref.data()); // Call DPC++ ROTMG. @@ -134,16 +134,16 @@ int test(device *dev, oneapi::mkl::layout layout) { main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during ROTMG:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of ROTMG:\n" << error.what() << std::endl; } @@ -213,7 +213,7 @@ int test(device *dev, oneapi::mkl::layout layout) { } class RotmgUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(RotmgUsmTests, RealSinglePrecision) { EXPECT_TRUEORSKIP(test(std::get<0>(GetParam()), std::get<1>(GetParam()))); diff --git a/tests/unit_tests/blas/level1/sdsdot.cpp b/tests/unit_tests/blas/level1/sdsdot.cpp index 7293a3699..1030713f0 100644 --- a/tests/unit_tests/blas/level1/sdsdot.cpp +++ b/tests/unit_tests/blas/level1/sdsdot.cpp @@ -41,11 +41,11 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, float alpha) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, float alpha) { // Prepare data. vector x, y; float result = float(-1), result_ref = float(-1); @@ -56,18 +56,18 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo // Call Reference SDSDOT. const int N_ref = N, incx_ref = incx, incy_ref = incy; - result_ref = ::sdsdot(&N_ref, (float *)&alpha, (float *)x.data(), &incx_ref, (float *)y.data(), - &incy_ref); + result_ref = + ::sdsdot(&N_ref, (float*)&alpha, (float*)x.data(), &incx_ref, (float*)y.data(), &incy_ref); // Call DPC++ SDSDOT. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SDSDOT:\n" << e.what() << std::endl; print_error_code(e); @@ -108,16 +108,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SDSDOT:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SDSDOT:\n" << error.what() << std::endl; } @@ -130,7 +130,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo } class SdsdotTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SdsdotTests, RealSinglePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); diff --git a/tests/unit_tests/blas/level1/sdsdot_usm.cpp b/tests/unit_tests/blas/level1/sdsdot_usm.cpp index a5740516c..ab0221754 100644 --- a/tests/unit_tests/blas/level1/sdsdot_usm.cpp +++ b/tests/unit_tests/blas/level1/sdsdot_usm.cpp @@ -41,18 +41,18 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { -int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, float alpha) { +int test(device* dev, oneapi::mkl::layout layout, int N, int incx, int incy, float alpha) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SDSDOT:\n" << e.what() << std::endl; print_error_code(e); @@ -76,12 +76,12 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo // Call Reference SDSDOT. const int N_ref = N, incx_ref = incx, incy_ref = incy; - result_ref = ::sdsdot(&N_ref, (float *)&alpha, (float *)x.data(), &incx_ref, (float *)y.data(), - &incy_ref); + result_ref = + ::sdsdot(&N_ref, (float*)&alpha, (float*)x.data(), &incx_ref, (float*)y.data(), &incy_ref); // Call DPC++ SDSDOT. - auto result_p = (float *)oneapi::mkl::malloc_shared(64, sizeof(float), *dev, cxt); + auto result_p = (float*)oneapi::mkl::malloc_shared(64, sizeof(float), *dev, cxt); try { #ifdef CALL_RT_API @@ -113,16 +113,16 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SDSDOT:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SDSDOT:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::mkl::layout layout, int N, int incx, int incy, flo } class SdsdotUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SdsdotUsmTests, RealSinglePrecision) { CHECK_DOUBLE_ON_DEVICE(std::get<0>(GetParam())); diff --git a/tests/unit_tests/blas/level2/gbmv.cpp b/tests/unit_tests/blas/level2/gbmv.cpp index 94fcbc906..20bc75490 100644 --- a/tests/unit_tests/blas/level2/gbmv.cpp +++ b/tests/unit_tests/blas/level2/gbmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n, int kl, int ku, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. int x_len = outer_dimension(transa, m, n); @@ -66,18 +66,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, using fp_ref = typename ref_type_info::type; ::gbmv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), &m_ref, &n_ref, &kl_ref, - &ku_ref, (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + &ku_ref, (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ GBMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -122,16 +122,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GBMV:\n" << error.what() << std::endl; } @@ -142,7 +142,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, return (int)good; } -class GbmvTests : public ::testing::TestWithParam> { +class GbmvTests : public ::testing::TestWithParam> { }; TEST_P(GbmvTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/gbmv_usm.cpp b/tests/unit_tests/blas/level2/gbmv_usm.cpp index 9d92fcf7e..ea66daab4 100644 --- a/tests/unit_tests/blas/level2/gbmv_usm.cpp +++ b/tests/unit_tests/blas/level2/gbmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n, int kl, int ku, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -86,8 +86,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, using fp_ref = typename ref_type_info::type; ::gbmv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), &m_ref, &n_ref, &kl_ref, - &ku_ref, (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + &ku_ref, (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ GBMV. @@ -124,16 +124,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GBMV:\n" << error.what() << std::endl; } @@ -145,7 +145,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, } class GbmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GbmvUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/gemv.cpp b/tests/unit_tests/blas/level2/gemv.cpp index 3bfff4324..bd15ab54b 100644 --- a/tests/unit_tests/blas/level2/gemv.cpp +++ b/tests/unit_tests/blas/level2/gemv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. int x_len = outer_dimension(transa, m, n); @@ -65,18 +65,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, using fp_ref = typename ref_type_info::type; ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), &m_ref, &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ GEMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMV:\n" << e.what() << std::endl; print_error_code(e); @@ -119,16 +119,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV:\n" << error.what() << std::endl; } @@ -139,7 +139,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, return (int)good; } -class GemvTests : public ::testing::TestWithParam> { +class GemvTests : public ::testing::TestWithParam> { }; TEST_P(GemvTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/gemv_usm.cpp b/tests/unit_tests/blas/level2/gemv_usm.cpp index d1e726e38..a513ab149 100644 --- a/tests/unit_tests/blas/level2/gemv_usm.cpp +++ b/tests/unit_tests/blas/level2/gemv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, int m, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GEMV:\n" << e.what() << std::endl; print_error_code(e); @@ -85,8 +85,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, using fp_ref = typename ref_type_info::type; ::gemv(convert_to_cblas_layout(layout), convert_to_cblas_trans(transa), &m_ref, &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ GEMV. @@ -123,16 +123,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GEMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GEMV:\n" << error.what() << std::endl; } @@ -144,7 +144,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::transpose transa, } class GemvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GemvUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/ger.cpp b/tests/unit_tests/blas/level2/ger.cpp index 3b32d2827..7610239ad 100644 --- a/tests/unit_tests/blas/level2/ger.cpp +++ b/tests/unit_tests/blas/level2/ger.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. @@ -62,18 +62,18 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::ger(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::ger(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GER. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GER:\n" << e.what() << std::endl; print_error_code(e); @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GER:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GER:\n" << error.what() << std::endl; } @@ -135,8 +135,7 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in return (int)good; } -class GerTests : public ::testing::TestWithParam> { -}; +class GerTests : public ::testing::TestWithParam> {}; TEST_P(GerTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/ger_usm.cpp b/tests/unit_tests/blas/level2/ger_usm.cpp index 87087f026..c9bece6b8 100644 --- a/tests/unit_tests/blas/level2/ger_usm.cpp +++ b/tests/unit_tests/blas/level2/ger_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GER:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::ger(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::ger(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GER. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GER:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GER:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in } class GerUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GerUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/gerc.cpp b/tests/unit_tests/blas/level2/gerc.cpp index c19c9f029..e918bbf92 100644 --- a/tests/unit_tests/blas/level2/gerc.cpp +++ b/tests/unit_tests/blas/level2/gerc.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. @@ -62,18 +62,18 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::gerc(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::gerc(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GERC. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GERC:\n" << e.what() << std::endl; print_error_code(e); @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GERC:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GERC:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in return (int)good; } -class GercTests : public ::testing::TestWithParam> { +class GercTests : public ::testing::TestWithParam> { }; TEST_P(GercTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/gerc_usm.cpp b/tests/unit_tests/blas/level2/gerc_usm.cpp index b6473484d..c9f04060d 100644 --- a/tests/unit_tests/blas/level2/gerc_usm.cpp +++ b/tests/unit_tests/blas/level2/gerc_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GERC:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::gerc(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::gerc(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GERC. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GERC:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GERC:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in } class GercUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GercUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/geru.cpp b/tests/unit_tests/blas/level2/geru.cpp index e0cb7c45d..23af195cf 100644 --- a/tests/unit_tests/blas/level2/geru.cpp +++ b/tests/unit_tests/blas/level2/geru.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. @@ -62,18 +62,18 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::geru(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::geru(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GERU. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GERU:\n" << e.what() << std::endl; print_error_code(e); @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GERU:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GERU:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in return (int)good; } -class GeruTests : public ::testing::TestWithParam> { +class GeruTests : public ::testing::TestWithParam> { }; TEST_P(GeruTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/geru_usm.cpp b/tests/unit_tests/blas/level2/geru_usm.cpp index 1e882bd97..31f4e2116 100644 --- a/tests/unit_tests/blas/level2/geru_usm.cpp +++ b/tests/unit_tests/blas/level2/geru_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, +int test(device* dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during GERU:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in const int m_ref = m, n_ref = n, incx_ref = incx, incy_ref = incy, lda_ref = lda; using fp_ref = typename ref_type_info::type; - ::geru(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref *)&alpha, (fp_ref *)x.data(), - &incx_ref, (fp_ref *)y.data(), &incy_ref, (fp_ref *)A_ref.data(), &lda_ref); + ::geru(convert_to_cblas_layout(layout), &m_ref, &n_ref, (fp_ref*)&alpha, (fp_ref*)x.data(), + &incx_ref, (fp_ref*)y.data(), &incy_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ GERU. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during GERU:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of GERU:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, int m, int n, fp alpha, int in } class GeruUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(GeruUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/hbmv.cpp b/tests/unit_tests/blas/level2/hbmv.cpp index 119aef32a..aa2b51ffa 100644 --- a/tests/unit_tests/blas/level2/hbmv.cpp +++ b/tests/unit_tests/blas/level2/hbmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. vector x, y, y_ref, A; @@ -63,18 +63,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::hbmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, &k_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HBMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -118,16 +118,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HBMV:\n" << error.what() << std::endl; } @@ -138,7 +138,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class HbmvTests : public ::testing::TestWithParam> { +class HbmvTests : public ::testing::TestWithParam> { }; TEST_P(HbmvTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/hbmv_usm.cpp b/tests/unit_tests/blas/level2/hbmv_usm.cpp index 60305cb93..183dc9e28 100644 --- a/tests/unit_tests/blas/level2/hbmv_usm.cpp +++ b/tests/unit_tests/blas/level2/hbmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -84,8 +84,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::hbmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, &k_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HBMV. @@ -122,16 +122,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HBMV:\n" << error.what() << std::endl; } @@ -143,7 +143,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class HbmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HbmvUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/hemv.cpp b/tests/unit_tests/blas/level2/hemv.cpp index 3636e3774..5e68db394 100644 --- a/tests/unit_tests/blas/level2/hemv.cpp +++ b/tests/unit_tests/blas/level2/hemv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. vector x, y, y_ref, A; @@ -62,18 +62,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::hemv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HEMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HEMV:\n" << e.what() << std::endl; print_error_code(e); @@ -116,16 +116,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HEMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HEMV:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class HemvTests : public ::testing::TestWithParam> { +class HemvTests : public ::testing::TestWithParam> { }; TEST_P(HemvTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/hemv_usm.cpp b/tests/unit_tests/blas/level2/hemv_usm.cpp index a1b8093fc..a5c20b4b9 100644 --- a/tests/unit_tests/blas/level2/hemv_usm.cpp +++ b/tests/unit_tests/blas/level2/hemv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HEMV:\n" << e.what() << std::endl; print_error_code(e); @@ -83,8 +83,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::hemv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HEMV. @@ -121,16 +121,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HEMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HEMV:\n" << error.what() << std::endl; } @@ -142,7 +142,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class HemvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HemvUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/her.cpp b/tests/unit_tests/blas/level2/her.cpp index 46ae9a879..8b0e77cf2 100644 --- a/tests/unit_tests/blas/level2/her.cpp +++ b/tests/unit_tests/blas/level2/her.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp_scalar alpha, int incx, int lda) { // Prepare data. vector x, A_ref, A; @@ -61,17 +61,17 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_scalar_mkl = typename ref_type_info::type; ::her(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_scalar_mkl *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data(), &lda_ref); + (fp_scalar_mkl*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ HER. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HER:\n" << e.what() << std::endl; print_error_code(e); @@ -111,16 +111,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HER:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HER:\n" << error.what() << std::endl; } @@ -131,8 +131,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class HerTests : public ::testing::TestWithParam> { -}; +class HerTests : public ::testing::TestWithParam> {}; TEST_P(HerTests, ComplexSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/her2.cpp b/tests/unit_tests/blas/level2/her2.cpp index e98c5cc8b..9da2be96a 100644 --- a/tests/unit_tests/blas/level2/her2.cpp +++ b/tests/unit_tests/blas/level2/her2.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. vector x, y, A_ref, A; @@ -62,18 +62,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::her2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ HER2. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HER2:\n" << e.what() << std::endl; print_error_code(e); @@ -115,16 +115,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HER2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HER2:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class Her2Tests : public ::testing::TestWithParam> { +class Her2Tests : public ::testing::TestWithParam> { }; TEST_P(Her2Tests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/her2_usm.cpp b/tests/unit_tests/blas/level2/her2_usm.cpp index c732331ee..6d65f18f4 100644 --- a/tests/unit_tests/blas/level2/her2_usm.cpp +++ b/tests/unit_tests/blas/level2/her2_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HER2:\n" << e.what() << std::endl; print_error_code(e); @@ -83,8 +83,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::her2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ HER2. @@ -121,16 +121,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HER2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HER2:\n" << error.what() << std::endl; } @@ -142,7 +142,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class Her2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Her2UsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/her_usm.cpp b/tests/unit_tests/blas/level2/her_usm.cpp index 9e1f5099e..083bd4f28 100644 --- a/tests/unit_tests/blas/level2/her_usm.cpp +++ b/tests/unit_tests/blas/level2/her_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp_scalar alpha, int incx, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HER:\n" << e.what() << std::endl; print_error_code(e); @@ -82,7 +82,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_scalar_mkl = typename ref_type_info::type; ::her(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_scalar_mkl *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data(), &lda_ref); + (fp_scalar_mkl*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ HER. @@ -116,16 +116,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HER:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HER:\n" << error.what() << std::endl; } @@ -137,7 +137,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class HerUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HerUsmTests, ComplexSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/hpmv.cpp b/tests/unit_tests/blas/level2/hpmv.cpp index 69e6ea9b2..23f6c4d91 100644 --- a/tests/unit_tests/blas/level2/hpmv.cpp +++ b/tests/unit_tests/blas/level2/hpmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy) { // Prepare data. vector x, y, y_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::hpmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HPMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPMV:\n" << e.what() << std::endl; print_error_code(e); @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPMV:\n" << error.what() << std::endl; } @@ -134,7 +134,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class HpmvTests : public ::testing::TestWithParam> { +class HpmvTests : public ::testing::TestWithParam> { }; TEST_P(HpmvTests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/hpmv_usm.cpp b/tests/unit_tests/blas/level2/hpmv_usm.cpp index 743194b18..3766c7e7d 100644 --- a/tests/unit_tests/blas/level2/hpmv_usm.cpp +++ b/tests/unit_tests/blas/level2/hpmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPMV:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::hpmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ HPMV. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPMV:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class HpmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HpmvUsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/hpr.cpp b/tests/unit_tests/blas/level2/hpr.cpp index b2e5548bd..ca79e335a 100644 --- a/tests/unit_tests/blas/level2/hpr.cpp +++ b/tests/unit_tests/blas/level2/hpr.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp_scalar alpha, int incx) { // Prepare data. vector x, A_ref, A; @@ -61,17 +61,17 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_scalar_mkl = typename ref_type_info::type; ::hpr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_scalar_mkl *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data()); + (fp_scalar_mkl*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data()); // Call DPC++ HPR. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPR:\n" << e.what() << std::endl; print_error_code(e); @@ -111,16 +111,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPR:\n" << error.what() << std::endl; } @@ -131,8 +131,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class HprTests : public ::testing::TestWithParam> { -}; +class HprTests : public ::testing::TestWithParam> {}; TEST_P(HprTests, ComplexSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/hpr2.cpp b/tests/unit_tests/blas/level2/hpr2.cpp index e2b19e2fd..22701fe3d 100644 --- a/tests/unit_tests/blas/level2/hpr2.cpp +++ b/tests/unit_tests/blas/level2/hpr2.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int incy) { // Prepare data. vector x, y, A_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::hpr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data()); // Call DPC++ HPR2. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPR2:\n" << e.what() << std::endl; print_error_code(e); @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPR2:\n" << error.what() << std::endl; } @@ -134,7 +134,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class Hpr2Tests : public ::testing::TestWithParam> { +class Hpr2Tests : public ::testing::TestWithParam> { }; TEST_P(Hpr2Tests, ComplexSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/hpr2_usm.cpp b/tests/unit_tests/blas/level2/hpr2_usm.cpp index 6dc60dbf6..392f9a74b 100644 --- a/tests/unit_tests/blas/level2/hpr2_usm.cpp +++ b/tests/unit_tests/blas/level2/hpr2_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPR2:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::hpr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data()); // Call DPC++ HPR2. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPR2:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class Hpr2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Hpr2UsmTests, ComplexSinglePrecision) { std::complex alpha(2.0, -0.5); diff --git a/tests/unit_tests/blas/level2/hpr_usm.cpp b/tests/unit_tests/blas/level2/hpr_usm.cpp index b90b0ee63..708018e6d 100644 --- a/tests/unit_tests/blas/level2/hpr_usm.cpp +++ b/tests/unit_tests/blas/level2/hpr_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp_scalar alpha, int incx) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during HPR:\n" << e.what() << std::endl; print_error_code(e); @@ -82,7 +82,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_scalar_mkl = typename ref_type_info::type; ::hpr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_scalar_mkl *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data()); + (fp_scalar_mkl*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data()); // Call DPC++ HPR. @@ -116,16 +116,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during HPR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of HPR:\n" << error.what() << std::endl; } @@ -137,7 +137,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class HprUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(HprUsmTests, ComplexSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/sbmv.cpp b/tests/unit_tests/blas/level2/sbmv.cpp index c0347dfda..49df93ad1 100644 --- a/tests/unit_tests/blas/level2/sbmv.cpp +++ b/tests/unit_tests/blas/level2/sbmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. vector x, y, y_ref, A; @@ -62,18 +62,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::sbmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, &k_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SBMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -116,16 +116,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SBMV:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class SbmvTests : public ::testing::TestWithParam> { +class SbmvTests : public ::testing::TestWithParam> { }; TEST_P(SbmvTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/sbmv_usm.cpp b/tests/unit_tests/blas/level2/sbmv_usm.cpp index 4fb7d46ad..43093cb24 100644 --- a/tests/unit_tests/blas/level2/sbmv_usm.cpp +++ b/tests/unit_tests/blas/level2/sbmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, int k, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SBMV:\n" << e.what() << std::endl; print_error_code(e); @@ -83,8 +83,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::sbmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, &k_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SBMV. @@ -121,16 +121,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SBMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SBMV:\n" << error.what() << std::endl; } @@ -142,7 +142,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class SbmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SbmvUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/spmv.cpp b/tests/unit_tests/blas/level2/spmv.cpp index 799e7d775..a2121fbac 100644 --- a/tests/unit_tests/blas/level2/spmv.cpp +++ b/tests/unit_tests/blas/level2/spmv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy) { // Prepare data. vector x, y, y_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::spmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SPMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPMV:\n" << e.what() << std::endl; print_error_code(e); @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPMV:\n" << error.what() << std::endl; } @@ -134,7 +134,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class SpmvTests : public ::testing::TestWithParam> { +class SpmvTests : public ::testing::TestWithParam> { }; TEST_P(SpmvTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/spmv_usm.cpp b/tests/unit_tests/blas/level2/spmv_usm.cpp index ae38ada4a..9dfe57383 100644 --- a/tests/unit_tests/blas/level2/spmv_usm.cpp +++ b/tests/unit_tests/blas/level2/spmv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPMV:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::spmv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), (fp_ref *)x.data(), &incx_ref, (fp_ref *)&beta, - (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), (fp_ref*)x.data(), &incx_ref, (fp_ref*)&beta, + (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SPMV. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPMV:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class SpmvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SpmvUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/spr.cpp b/tests/unit_tests/blas/level2/spr.cpp index 4e4b5d8a9..05b809f45 100644 --- a/tests/unit_tests/blas/level2/spr.cpp +++ b/tests/unit_tests/blas/level2/spr.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx) { // Prepare data. vector x, A_ref, A; @@ -60,17 +60,17 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::spr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data()); // Call DPC++ SPR. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPR:\n" << e.what() << std::endl; print_error_code(e); @@ -110,16 +110,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPR:\n" << error.what() << std::endl; } @@ -130,8 +130,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class SprTests : public ::testing::TestWithParam> { -}; +class SprTests : public ::testing::TestWithParam> {}; TEST_P(SprTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/spr2.cpp b/tests/unit_tests/blas/level2/spr2.cpp index d9d00a4e8..bbb232f5c 100644 --- a/tests/unit_tests/blas/level2/spr2.cpp +++ b/tests/unit_tests/blas/level2/spr2.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int incy) { // Prepare data. vector x, y, A_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::spr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data()); // Call DPC++ SPR2. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPR2:\n" << e.what() << std::endl; print_error_code(e); @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPR2:\n" << error.what() << std::endl; } @@ -134,7 +134,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class Spr2Tests : public ::testing::TestWithParam> { +class Spr2Tests : public ::testing::TestWithParam> { }; TEST_P(Spr2Tests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/spr2_usm.cpp b/tests/unit_tests/blas/level2/spr2_usm.cpp index 683288775..4a029015f 100644 --- a/tests/unit_tests/blas/level2/spr2_usm.cpp +++ b/tests/unit_tests/blas/level2/spr2_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int incy) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPR2:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::spr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data()); // Call DPC++ SPR2. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPR2:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class Spr2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Spr2UsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/spr_usm.cpp b/tests/unit_tests/blas/level2/spr_usm.cpp index 3a23a33b4..e81aa41d9 100644 --- a/tests/unit_tests/blas/level2/spr_usm.cpp +++ b/tests/unit_tests/blas/level2/spr_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SPR:\n" << e.what() << std::endl; print_error_code(e); @@ -81,7 +81,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::spr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data()); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data()); // Call DPC++ SPR. @@ -115,16 +115,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SPR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SPR:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class SprUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SprUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/symv.cpp b/tests/unit_tests/blas/level2/symv.cpp index a22e48ff7..fb33d8914 100644 --- a/tests/unit_tests/blas/level2/symv.cpp +++ b/tests/unit_tests/blas/level2/symv.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Prepare data. vector x, y, y_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::symv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SYMV. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYMV:\n" << e.what() << std::endl; print_error_code(e); @@ -115,16 +115,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYMV:\n" << error.what() << std::endl; } @@ -135,7 +135,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class SymvTests : public ::testing::TestWithParam> { +class SymvTests : public ::testing::TestWithParam> { }; TEST_P(SymvTests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/symv_usm.cpp b/tests/unit_tests/blas/level2/symv_usm.cpp index f33c0d25f..8cfff4f39 100644 --- a/tests/unit_tests/blas/level2/symv_usm.cpp +++ b/tests/unit_tests/blas/level2/symv_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, fp beta, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYMV:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::symv(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)A.data(), &lda_ref, (fp_ref *)x.data(), &incx_ref, - (fp_ref *)&beta, (fp_ref *)y_ref.data(), &incy_ref); + (fp_ref*)&alpha, (fp_ref*)A.data(), &lda_ref, (fp_ref*)x.data(), &incx_ref, + (fp_ref*)&beta, (fp_ref*)y_ref.data(), &incy_ref); // Call DPC++ SYMV. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYMV:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYMV:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class SymvUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SymvUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/syr.cpp b/tests/unit_tests/blas/level2/syr.cpp index 6b305582b..f382749da 100644 --- a/tests/unit_tests/blas/level2/syr.cpp +++ b/tests/unit_tests/blas/level2/syr.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int lda) { // Prepare data. vector x, A_ref, A; @@ -60,17 +60,17 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::syr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ SYR. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYR:\n" << e.what() << std::endl; print_error_code(e); @@ -110,16 +110,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYR:\n" << error.what() << std::endl; } @@ -130,8 +130,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class SyrTests : public ::testing::TestWithParam> { -}; +class SyrTests : public ::testing::TestWithParam> {}; TEST_P(SyrTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/syr2.cpp b/tests/unit_tests/blas/level2/syr2.cpp index 5da1e0106..ef96572e5 100644 --- a/tests/unit_tests/blas/level2/syr2.cpp +++ b/tests/unit_tests/blas/level2/syr2.cpp @@ -42,12 +42,12 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int incy, int lda) { // Prepare data. vector x, y, A_ref, A; @@ -61,18 +61,18 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::syr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ SYR2. // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYR2:\n" << e.what() << std::endl; print_error_code(e); @@ -114,16 +114,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYR2:\n" << error.what() << std::endl; } @@ -134,7 +134,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, return (int)good; } -class Syr2Tests : public ::testing::TestWithParam> { +class Syr2Tests : public ::testing::TestWithParam> { }; TEST_P(Syr2Tests, RealSinglePrecision) { diff --git a/tests/unit_tests/blas/level2/syr2_usm.cpp b/tests/unit_tests/blas/level2/syr2_usm.cpp index a1e2cba7d..64db524f6 100644 --- a/tests/unit_tests/blas/level2/syr2_usm.cpp +++ b/tests/unit_tests/blas/level2/syr2_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int incy, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYR2:\n" << e.what() << std::endl; print_error_code(e); @@ -82,8 +82,8 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::syr2(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)y.data(), &incy_ref, - (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)y.data(), &incy_ref, + (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ SYR2. @@ -120,16 +120,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYR2:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYR2:\n" << error.what() << std::endl; } @@ -141,7 +141,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class Syr2UsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(Syr2UsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/blas/level2/syr_usm.cpp b/tests/unit_tests/blas/level2/syr_usm.cpp index 5a9f5034d..c6b652d24 100644 --- a/tests/unit_tests/blas/level2/syr_usm.cpp +++ b/tests/unit_tests/blas/level2/syr_usm.cpp @@ -42,20 +42,20 @@ using namespace sycl; using std::vector; -extern std::vector devices; +extern std::vector devices; namespace { template -int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, +int test(device* dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, int n, fp alpha, int incx, int lda) { // Catch asynchronous exceptions. auto exception_handler = [](exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught asynchronous SYCL exception during SYR:\n" << e.what() << std::endl; print_error_code(e); @@ -81,7 +81,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, using fp_ref = typename ref_type_info::type; ::syr(convert_to_cblas_layout(layout), convert_to_cblas_uplo(upper_lower), &n_ref, - (fp_ref *)&alpha, (fp_ref *)x.data(), &incx_ref, (fp_ref *)A_ref.data(), &lda_ref); + (fp_ref*)&alpha, (fp_ref*)x.data(), &incx_ref, (fp_ref*)A_ref.data(), &lda_ref); // Call DPC++ SYR. @@ -115,16 +115,16 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, main_queue.wait(); #endif } - catch (exception const &e) { + catch (exception const& e) { std::cout << "Caught synchronous SYCL exception during SYR:\n" << e.what() << std::endl; print_error_code(e); } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of SYR:\n" << error.what() << std::endl; } @@ -136,7 +136,7 @@ int test(device *dev, oneapi::mkl::layout layout, oneapi::mkl::uplo upper_lower, } class SyrUsmTests - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; TEST_P(SyrUsmTests, RealSinglePrecision) { float alpha(2.0); diff --git a/tests/unit_tests/dft/include/compute_inplace.hpp b/tests/unit_tests/dft/include/compute_inplace.hpp index 9cc161c34..95421a232 100644 --- a/tests/unit_tests/dft/include/compute_inplace.hpp +++ b/tests/unit_tests/dft/include/compute_inplace.hpp @@ -94,9 +94,12 @@ int DFT_Test::test_in_place_buffer() { auto acc_host = inout_buf.get_host_access(); auto ptr_host = reinterpret_cast(acc_host.get_pointer()); for (std::int64_t i = 0; i < batches; i++) { - EXPECT_TRUE(check_equal_strided( - ptr_host + backward_distance * i, out_host_ref.data() + ref_distance * i, sizes, - modified_strides_bwd, abs_error_margin, rel_error_margin, std::cout)); + EXPECT_TRUE(check_equal_strided < domain == + oneapi::mkl::dft::domain::REAL > + (ptr_host + backward_distance * i, + out_host_ref.data() + ref_distance * i, sizes, + modified_strides_bwd, abs_error_margin, rel_error_margin, + std::cout)); } } @@ -188,10 +191,11 @@ int DFT_Test::test_in_place_USM() { .wait_and_throw(); for (std::int64_t i = 0; i < batches; i++) { - EXPECT_TRUE(check_equal_strided( - reinterpret_cast(inout.data()) + backward_distance * i, - out_host_ref.data() + ref_distance * i, sizes, modified_strides_bwd, abs_error_margin, - rel_error_margin, std::cout)); + EXPECT_TRUE(check_equal_strided < domain == + oneapi::mkl::dft::domain::REAL > + (reinterpret_cast(inout.data()) + backward_distance * i, + out_host_ref.data() + ref_distance * i, sizes, modified_strides_bwd, + abs_error_margin, rel_error_margin, std::cout)); } sycl::event done = diff --git a/tests/unit_tests/dft/include/compute_out_of_place.hpp b/tests/unit_tests/dft/include/compute_out_of_place.hpp index bcfd09dda..0d2041dc1 100644 --- a/tests/unit_tests/dft/include/compute_out_of_place.hpp +++ b/tests/unit_tests/dft/include/compute_out_of_place.hpp @@ -77,9 +77,11 @@ int DFT_Test::test_out_of_place_buffer() { auto acc_bwd = bwd_buf.get_host_access(); auto bwd_ptr = acc_bwd.get_pointer(); for (std::int64_t i = 0; i < batches; i++) { - EXPECT_TRUE(check_equal_strided( - bwd_ptr + backward_distance * i, out_host_ref.data() + ref_distance * i, sizes, - strides_bwd_cpy, abs_error_margin, rel_error_margin, std::cout)); + EXPECT_TRUE(check_equal_strided < domain == + oneapi::mkl::dft::domain::REAL > + (bwd_ptr + backward_distance * i, + out_host_ref.data() + ref_distance * i, sizes, strides_bwd_cpy, + abs_error_margin, rel_error_margin, std::cout)); } } @@ -90,7 +92,7 @@ int DFT_Test::test_out_of_place_buffer() { // account for scaling that occurs during DFT std::for_each(input.begin(), input.end(), - [this](auto &x) { x *= static_cast(forward_elements); }); + [this](auto& x) { x *= static_cast(forward_elements); }); for (std::int64_t i = 0; i < batches; i++) { EXPECT_TRUE(check_equal_strided( @@ -164,9 +166,10 @@ int DFT_Test::test_out_of_place_USM() { auto bwd_ptr = &bwd[0]; for (std::int64_t i = 0; i < batches; i++) { - EXPECT_TRUE(check_equal_strided( - bwd_ptr + backward_distance * i, out_host_ref.data() + ref_distance * i, sizes, - strides_bwd_cpy, abs_error_margin, rel_error_margin, std::cout)); + EXPECT_TRUE(check_equal_strided < domain == + oneapi::mkl::dft::domain::REAL > + (bwd_ptr + backward_distance * i, out_host_ref.data() + ref_distance * i, + sizes, strides_bwd_cpy, abs_error_margin, rel_error_margin, std::cout)); } oneapi::mkl::dft::compute_backward, FwdOutputType, @@ -176,7 +179,7 @@ int DFT_Test::test_out_of_place_USM() { // account for scaling that occurs during DFT std::for_each(input.begin(), input.end(), - [this](auto &x) { x *= static_cast(forward_elements); }); + [this](auto& x) { x *= static_cast(forward_elements); }); for (std::int64_t i = 0; i < batches; i++) { EXPECT_TRUE(check_equal_strided( diff --git a/tests/unit_tests/dft/include/reference_dft.hpp b/tests/unit_tests/dft/include/reference_dft.hpp index 236edc7b0..7114306c6 100644 --- a/tests/unit_tests/dft/include/reference_dft.hpp +++ b/tests/unit_tests/dft/include/reference_dft.hpp @@ -32,7 +32,7 @@ namespace detail { using ref_t = long double; /* Do the calculations using long double */ template -void reference_forward_dft_impl(const TypeIn *in, TypeOut *out, std::size_t N, std::size_t stride) { +void reference_forward_dft_impl(const TypeIn* in, TypeOut* out, std::size_t N, std::size_t stride) { static_assert(is_complex(), "Output type of DFT must be complex"); constexpr ref_t TWOPI = 2.0L * 3.141592653589793238462643383279502884197L; @@ -54,14 +54,14 @@ struct reference {}; template struct reference { - static void forward_dft(const std::vector &sizes, const TypeIn *in, TypeOut *out) { + static void forward_dft(const std::vector& sizes, const TypeIn* in, TypeOut* out) { reference_forward_dft_impl(in, out, sizes[0], 1); } }; template struct reference { - static void forward_dft(const std::vector &sizes, const TypeIn *in, TypeOut *out) { + static void forward_dft(const std::vector& sizes, const TypeIn* in, TypeOut* out) { const auto elements = std::accumulate(sizes.begin(), sizes.end(), 1U, std::multiplies<>{}); std::vector> tmp(elements); for (std::size_t i = 0; i < elements; i += sizes[1]) { @@ -75,7 +75,7 @@ struct reference { template struct reference { - static void forward_dft(const std::vector &sizes, const TypeIn *in, TypeOut *out) { + static void forward_dft(const std::vector& sizes, const TypeIn* in, TypeOut* out) { const auto elements = std::accumulate(sizes.begin(), sizes.end(), 1U, std::multiplies<>{}); std::vector> tmp1(elements); std::vector> tmp2(elements); @@ -112,7 +112,7 @@ struct reference { * @param stride the stride between elements in the data set, measured in elements. **/ template -void reference_forward_dft(const std::vector &sizes, const TypeIn *in, TypeOut *out) { +void reference_forward_dft(const std::vector& sizes, const TypeIn* in, TypeOut* out) { std::vector unsigned_sizes(sizes.size()); std::transform(sizes.begin(), sizes.end(), unsigned_sizes.begin(), [](std::int64_t size) { return cast_unsigned(size); }); diff --git a/tests/unit_tests/dft/include/test_common.hpp b/tests/unit_tests/dft/include/test_common.hpp index b13723105..4410bdeb2 100644 --- a/tests/unit_tests/dft/include/test_common.hpp +++ b/tests/unit_tests/dft/include/test_common.hpp @@ -58,7 +58,7 @@ inline std::size_t cast_unsigned(std::int64_t i) { } template -bool check_equal(fp x, fp x_ref, double abs_error_mag, double rel_error_mag, std::ostream &out) { +bool check_equal(fp x, fp x_ref, double abs_error_mag, double rel_error_mag, std::ostream& out) { using fp_real = typename complex_info::real_type; static_assert(std::is_floating_point_v, "Expected floating-point real or complex type."); @@ -88,8 +88,8 @@ bool check_equal(fp x, fp x_ref, double abs_error_mag, double rel_error_mag, std } template -bool check_equal_vector(vec1 &&v, vec2 &&v_ref, std::size_t n, double abs_error_mag, - double rel_error_mag, std::ostream &out) { +bool check_equal_vector(vec1&& v, vec2&& v_ref, std::size_t n, double abs_error_mag, + double rel_error_mag, std::ostream& out) { constexpr int max_print = 20; int count = 0; bool good = true; @@ -131,7 +131,7 @@ inline t rand_scalar() { } template -void rand_vector(vec &v, std::size_t n) { +void rand_vector(vec& v, std::size_t n) { using fp = typename vec::value_type; v.resize(n); for (std::size_t i = 0; i < n; i++) { @@ -141,7 +141,7 @@ void rand_vector(vec &v, std::size_t n) { // Catch asynchronous exceptions. auto exception_handler = [](sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } @@ -153,7 +153,7 @@ auto exception_handler = [](sycl::exception_list exceptions) { }; template -void commit_descriptor(oneapi::mkl::dft::descriptor &descriptor, +void commit_descriptor(oneapi::mkl::dft::descriptor& descriptor, sycl::queue queue) { #ifdef CALL_RT_API descriptor.commit(queue); @@ -164,7 +164,7 @@ void commit_descriptor(oneapi::mkl::dft::descriptor &descript // is it assumed that the unused elements of the array are ignored inline std::array get_conjugate_even_complex_strides( - const std::vector &sizes) { + const std::vector& sizes) { switch (sizes.size()) { case 1: return { 0, 1 }; case 2: return { 0, sizes[1] / 2 + 1, 1 }; @@ -178,7 +178,7 @@ inline std::array get_conjugate_even_complex_strides( } // is it assumed that the unused elements of the array are ignored -inline std::array get_default_strides(const std::vector &sizes) { +inline std::array get_default_strides(const std::vector& sizes) { if (sizes.size() > 3) { throw oneapi::mkl::unimplemented( "dft/test_common", __FUNCTION__, @@ -207,8 +207,8 @@ T get_default(const std::vector vec, std::size_t idx, T default_) { template std::pair get_default_distances( - const std::vector &sizes, const std::vector &strides_fwd, - const std::vector &strides_bwd) { + const std::vector& sizes, const std::vector& strides_fwd, + const std::vector& strides_bwd) { std::int64_t size0 = sizes[0]; std::int64_t size1 = get_default(sizes, 1, 1l); std::int64_t size2 = get_default(sizes, 2, 1l); @@ -241,8 +241,8 @@ std::pair get_default_distances( //up to 3 dimensions, empty strides = default template > std::vector strided_copy( - const T_vec &contiguous, const std::vector &sizes, - const std::vector &strides, std::int64_t batches, std::int64_t distance, + const T_vec& contiguous, const std::vector& sizes, + const std::vector& strides, std::int64_t batches, std::int64_t distance, Allocator alloc = {}) { if (strides.size() == 0) { return { contiguous.begin(), contiguous.end(), alloc }; @@ -273,9 +273,9 @@ std::vector strided_copy( //up to 3 dimensions, empty strides = default template -bool check_equal_strided(const vec1 &v, const vec2 &v_ref, std::vector sizes, +bool check_equal_strided(const vec1& v, const vec2& v_ref, std::vector sizes, std::vector strides, double abs_error_mag, double rel_error_mag, - std::ostream &out) { + std::ostream& out) { if (strides.size() == 0) { std::array strides_arr; if constexpr (ConjugateEvenStrides) { @@ -344,8 +344,7 @@ struct DFTParams { class DFTParamsPrint { public: - std::string operator()( - testing::TestParamInfo> dev) const { + std::string operator()(testing::TestParamInfo> dev) const { auto [device, params] = dev.param; std::string info_name; @@ -377,7 +376,7 @@ class DFTParamsPrint { info_name.append("_batches_").append(std::to_string(params.batches)); std::string dev_name = device->get_info(); - std::for_each(dev_name.begin(), dev_name.end(), [](auto &c) { + std::for_each(dev_name.begin(), dev_name.end(), [](auto& c) { if (!isalnum(c)) c = '_'; }); diff --git a/tests/unit_tests/dft/source/compute_tests.cpp b/tests/unit_tests/dft/source/compute_tests.cpp index 7f737ef50..eb1d88e7a 100644 --- a/tests/unit_tests/dft/source/compute_tests.cpp +++ b/tests/unit_tests/dft/source/compute_tests.cpp @@ -35,27 +35,27 @@ #include "compute_out_of_place.hpp" #include "compute_out_of_place_real_real.hpp" -extern std::vector devices; +extern std::vector devices; namespace { class ComputeTests_in_place_COMPLEX - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_real_real_in_place_COMPLEX - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_out_of_place_COMPLEX - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_real_real_out_of_place_COMPLEX - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_in_place_REAL - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_real_real_in_place_REAL - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_out_of_place_REAL - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; class ComputeTests_real_real_out_of_place_REAL - : public ::testing::TestWithParam> {}; + : public ::testing::TestWithParam> {}; #define INSTANTIATE_TEST(PRECISION, DOMAIN, PLACE, LAYOUT, STORAGE) \ TEST_P(ComputeTests##_##LAYOUT##PLACE##_##DOMAIN, \ diff --git a/tests/unit_tests/include/test_helper.hpp b/tests/unit_tests/include/test_helper.hpp index d4f79c5ce..c6eaf3421 100644 --- a/tests/unit_tests/include/test_helper.hpp +++ b/tests/unit_tests/include/test_helper.hpp @@ -271,11 +271,11 @@ } \ } while (0); -void print_error_code(sycl::exception const &e); +void print_error_code(sycl::exception const& e); class DeviceNamePrint { public: - std::string operator()(testing::TestParamInfo dev) const { + std::string operator()(testing::TestParamInfo dev) const { std::string dev_name = dev.param->get_info(); for (std::string::size_type i = 0; i < dev_name.size(); ++i) { if (!isalnum(dev_name[i])) @@ -290,7 +290,7 @@ class DeviceNamePrint { class LayoutDeviceNamePrint { public: std::string operator()( - testing::TestParamInfo> dev) const { + testing::TestParamInfo> dev) const { std::string layout_name = std::get<1>(dev.param) == oneapi::mkl::layout::col_major ? "Column_Major" : "Row_Major"; std::string dev_name = std::get<0>(dev.param)->get_info(); @@ -308,7 +308,7 @@ class LayoutDeviceNamePrint { namespace oneapi { namespace mkl { -static inline void *aligned_alloc(size_t align, size_t size) { +static inline void* aligned_alloc(size_t align, size_t size) { #ifdef _WIN64 return ::_aligned_malloc(size, align); #else @@ -316,7 +316,7 @@ static inline void *aligned_alloc(size_t align, size_t size) { #endif } -static inline void aligned_free(void *p) { +static inline void aligned_free(void* p) { #ifdef _WIN64 ::_aligned_free(p); #else @@ -325,7 +325,7 @@ static inline void aligned_free(void *p) { } /* Support for Unified Shared Memory allocations for different backends */ -static inline void *malloc_shared(size_t align, size_t size, sycl::device dev, sycl::context ctx) { +static inline void* malloc_shared(size_t align, size_t size, sycl::device dev, sycl::context ctx) { (void)align; #ifdef _WIN64 return sycl::malloc_shared(size, dev, ctx); @@ -339,7 +339,7 @@ static inline void *malloc_shared(size_t align, size_t size, sycl::device dev, s #endif } -static inline void *malloc_device(size_t align, size_t size, sycl::device dev, sycl::context ctx) { +static inline void* malloc_device(size_t align, size_t size, sycl::device dev, sycl::context ctx) { (void)align; #ifdef _WIN64 return sycl::malloc_device(size, dev, ctx); @@ -353,11 +353,11 @@ static inline void *malloc_device(size_t align, size_t size, sycl::device dev, s #endif } -static inline void free_shared(void *p, sycl::context ctx) { +static inline void free_shared(void* p, sycl::context ctx) { sycl::free(p, ctx); } -static inline void free_usm(void *p, sycl::context ctx) { +static inline void free_usm(void* p, sycl::context ctx) { sycl::free(p, ctx); } diff --git a/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp b/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp index cb09ec16a..07ce554e8 100644 --- a/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp +++ b/tests/unit_tests/lapack/include/lapack_reference_wrappers.hpp @@ -62,7 +62,7 @@ inline CBLAS_DIAG cblas_diag(oneapi::mkl::diag d) { return CblasUnit; return CblasNonUnit; } -inline CBLAS_SIDE cblas_side(const char *c) { +inline CBLAS_SIDE cblas_side(const char* c) { return *c == 'R' || *c == 'r' ? CblasRight : CblasLeft; } inline CBLAS_SIDE cblas_side(oneapi::mkl::side s) { @@ -150,142 +150,142 @@ inline char to_char(oneapi::mkl::generate v) { } inline void gemm(oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int64_t m, int64_t n, - int64_t k, float alpha, const float *a, int64_t lda, const float *b, int64_t ldb, - float beta, float *c, int64_t ldc) { + int64_t k, float alpha, const float* a, int64_t lda, const float* b, int64_t ldb, + float beta, float* c, int64_t ldc) { cblas_sgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } inline void gemm(oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int64_t m, int64_t n, - int64_t k, double alpha, const double *a, int64_t lda, const double *b, - int64_t ldb, double beta, double *c, int64_t ldc) { + int64_t k, double alpha, const double* a, int64_t lda, const double* b, + int64_t ldb, double beta, double* c, int64_t ldc) { cblas_dgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); } inline void gemm(oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc) { - cblas_cgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void *)&alpha, - (void *)a, lda, (void *)(b), ldb, (void *)&beta, (void *)c, ldc); + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc) { + cblas_cgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void*)&alpha, + (void*)a, lda, (void*)(b), ldb, (void*)&beta, (void*)c, ldc); } inline void gemm(oneapi::mkl::transpose transa, oneapi::mkl::transpose transb, int64_t m, int64_t n, - int64_t k, std::complex alpha, const std::complex *a, int64_t lda, - const std::complex *b, int64_t ldb, std::complex beta, - std::complex *c, int64_t ldc) { - cblas_zgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void *)&alpha, - (void *)a, lda, (void *)(b), ldb, (void *)&beta, (void *)c, ldc); + int64_t k, std::complex alpha, const std::complex* a, int64_t lda, + const std::complex* b, int64_t ldb, std::complex beta, + std::complex* c, int64_t ldc) { + cblas_zgemm(CblasColMajor, cblas_trans(transa), cblas_trans(transb), m, n, k, (void*)&alpha, + (void*)a, lda, (void*)(b), ldb, (void*)&beta, (void*)c, ldc); } -inline int64_t syevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, float *a, int64_t lda, - float *w) { +inline int64_t syevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, float* a, int64_t lda, + float* w) { return LAPACKE_ssyevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, a, lda, w); } -inline int64_t syevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, double *a, int64_t lda, - double *w) { +inline int64_t syevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, double* a, int64_t lda, + double* w) { return LAPACKE_dsyevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, a, lda, w); } -inline int64_t sygvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, float *a, - int64_t lda, float *b, int64_t ldb, float *w) { +inline int64_t sygvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, float* a, + int64_t lda, float* b, int64_t ldb, float* w) { return LAPACKE_ssygvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, a, lda, b, ldb, w); } -inline int64_t sygvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, double *a, - int64_t lda, double *b, int64_t ldb, double *w) { +inline int64_t sygvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, double* a, + int64_t lda, double* b, int64_t ldb, double* w) { return LAPACKE_dsygvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, a, lda, b, ldb, w); } inline void syrk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, float alpha, - const float *a, int64_t lda, float beta, float *c, int64_t ldc) { + const float* a, int64_t lda, float beta, float* c, int64_t ldc) { cblas_ssyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc); } inline void syrk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, double alpha, - const double *a, int64_t lda, double beta, double *c, int64_t ldc) { + const double* a, int64_t lda, double beta, double* c, int64_t ldc) { cblas_dsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc); } inline void syrk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc) { - cblas_csyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void *)&alpha, a, lda, - (void *)&beta, (void *)c, ldc); + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc) { + cblas_csyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void*)&alpha, a, lda, + (void*)&beta, (void*)c, ldc); } inline void syrk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, - std::complex alpha, const std::complex *a, int64_t lda, - std::complex beta, std::complex *c, int64_t ldc) { - cblas_zsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void *)&alpha, a, lda, - (void *)&beta, (void *)c, ldc); + std::complex alpha, const std::complex* a, int64_t lda, + std::complex beta, std::complex* c, int64_t ldc) { + cblas_zsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, (void*)&alpha, a, lda, + (void*)&beta, (void*)c, ldc); } inline void herk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, float alpha, - const std::complex *a, int64_t lda, float beta, std::complex *c, + const std::complex* a, int64_t lda, float beta, std::complex* c, int64_t ldc) { - cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void *)c, + cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c, ldc); } inline void herk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, double alpha, - const std::complex *a, int64_t lda, double beta, std::complex *c, + const std::complex* a, int64_t lda, double beta, std::complex* c, int64_t ldc) { - cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void *)c, + cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c, ldc); } inline void sy_he_rk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, - float alpha, const float *a, int64_t lda, float beta, float *c, int64_t ldc) { + float alpha, const float* a, int64_t lda, float beta, float* c, int64_t ldc) { cblas_ssyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc); } inline void sy_he_rk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, - double alpha, const double *a, int64_t lda, double beta, double *c, + double alpha, const double* a, int64_t lda, double beta, double* c, int64_t ldc) { cblas_dsyrk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, c, ldc); } inline void sy_he_rk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, - float alpha, const std::complex *a, int64_t lda, float beta, - std::complex *c, int64_t ldc) { - cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void *)c, + float alpha, const std::complex* a, int64_t lda, float beta, + std::complex* c, int64_t ldc) { + cblas_cherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c, ldc); } inline void sy_he_rk(oneapi::mkl::uplo u, oneapi::mkl::transpose t, int64_t n, int64_t k, - double alpha, const std::complex *a, int64_t lda, double beta, - std::complex *c, int64_t ldc) { - cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void *)c, + double alpha, const std::complex* a, int64_t lda, double beta, + std::complex* c, int64_t ldc) { + cblas_zherk(CblasColMajor, cblas_uplo(u), cblas_trans(t), n, k, alpha, a, lda, beta, (void*)c, ldc); } inline void trmm(oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose transa, - oneapi::mkl::diag diag, int64_t m, int64_t n, float alpha, const float *a, - int64_t lda, float *b, int64_t ldb) { + oneapi::mkl::diag diag, int64_t m, int64_t n, float alpha, const float* a, + int64_t lda, float* b, int64_t ldb) { cblas_strmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa), cblas_diag(diag), m, n, alpha, a, lda, b, ldb); } inline void trmm(oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose transa, - oneapi::mkl::diag diag, int64_t m, int64_t n, double alpha, const double *a, - int64_t lda, double *b, int64_t ldb) { + oneapi::mkl::diag diag, int64_t m, int64_t n, double alpha, const double* a, + int64_t lda, double* b, int64_t ldb) { cblas_dtrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa), cblas_diag(diag), m, n, alpha, a, lda, b, ldb); } inline void trmm(oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose transa, oneapi::mkl::diag diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb) { cblas_ctrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa), - cblas_diag(diag), m, n, (void *)&alpha, (void *)(a), lda, (void *)(b), ldb); + cblas_diag(diag), m, n, (void*)&alpha, (void*)(a), lda, (void*)(b), ldb); } inline void trmm(oneapi::mkl::side side, oneapi::mkl::uplo uplo, oneapi::mkl::transpose transa, oneapi::mkl::diag diag, int64_t m, int64_t n, std::complex alpha, - const std::complex *a, int64_t lda, std::complex *b, int64_t ldb) { + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb) { cblas_ztrmm(CblasColMajor, cblas_side(side), cblas_uplo(uplo), cblas_trans(transa), - cblas_diag(diag), m, n, (void *)&alpha, (void *)(a), lda, (void *)(b), ldb); + cblas_diag(diag), m, n, (void*)&alpha, (void*)(a), lda, (void*)(b), ldb); } -inline void swap(int64_t n, float *X, int64_t incX, float *Y, int64_t incY) { +inline void swap(int64_t n, float* X, int64_t incX, float* Y, int64_t incY) { cblas_sswap(n, X, incX, Y, incY); } -inline void swap(int64_t n, double *X, int64_t incX, double *Y, int64_t incY) { +inline void swap(int64_t n, double* X, int64_t incX, double* Y, int64_t incY) { cblas_dswap(n, X, incX, Y, incY); } -inline void swap(int64_t n, std::complex *X, int64_t incX, std::complex *Y, +inline void swap(int64_t n, std::complex* X, int64_t incX, std::complex* Y, int64_t incY) { - cblas_cswap(n, (void *)X, incX, (void *)Y, incY); + cblas_cswap(n, (void*)X, incX, (void*)Y, incY); } -inline void swap(int64_t n, std::complex *X, int64_t incX, std::complex *Y, +inline void swap(int64_t n, std::complex* X, int64_t incX, std::complex* Y, int64_t incY) { - cblas_zswap(n, (void *)X, incX, (void *)Y, incY); + cblas_zswap(n, (void*)X, incX, (void*)Y, incY); } template @@ -299,608 +299,607 @@ inline double lamch(char cmach) { return LAPACKE_dlamch(cmach); } -inline float lange(char norm, int64_t m, int64_t n, const std::complex *a, int64_t lda) { +inline float lange(char norm, int64_t m, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_clange(LAPACK_COL_MAJOR, norm, m, n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline double lange(char norm, int64_t m, int64_t n, const double *a, int64_t lda) { +inline double lange(char norm, int64_t m, int64_t n, const double* a, int64_t lda) { return LAPACKE_dlange(LAPACK_COL_MAJOR, norm, m, n, a, lda); } -inline float lange(char norm, int64_t m, int64_t n, const float *a, int64_t lda) { +inline float lange(char norm, int64_t m, int64_t n, const float* a, int64_t lda) { return LAPACKE_slange(LAPACK_COL_MAJOR, norm, m, n, a, lda); } -inline double lange(char norm, int64_t m, int64_t n, const std::complex *a, int64_t lda) { +inline double lange(char norm, int64_t m, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_zlange(LAPACK_COL_MAJOR, norm, m, n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline float lanhe(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex *a, +inline float lanhe(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_clanhe(LAPACK_COL_MAJOR, norm, to_char(u), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline double lanhe(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex *a, +inline double lanhe(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_zlanhe(LAPACK_COL_MAJOR, norm, to_char(u), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline float lansy(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex *a, +inline float lansy(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_clansy(LAPACK_COL_MAJOR, norm, to_char(u), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline double lansy(char norm, oneapi::mkl::uplo u, int64_t n, const double *a, int64_t lda) { +inline double lansy(char norm, oneapi::mkl::uplo u, int64_t n, const double* a, int64_t lda) { return LAPACKE_dlansy(LAPACK_COL_MAJOR, norm, to_char(u), n, a, lda); } -inline float lansy(char norm, oneapi::mkl::uplo u, int64_t n, const float *a, int64_t lda) { +inline float lansy(char norm, oneapi::mkl::uplo u, int64_t n, const float* a, int64_t lda) { return LAPACKE_slansy(LAPACK_COL_MAJOR, norm, to_char(u), n, a, lda); } -inline double lansy(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex *a, +inline double lansy(char norm, oneapi::mkl::uplo u, int64_t n, const std::complex* a, int64_t lda) { return LAPACKE_zlansy(LAPACK_COL_MAJOR, norm, to_char(u), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb) { +inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb) { return LAPACKE_clacpy(LAPACK_COL_MAJOR, u, m, n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t lacpy(char u, int64_t m, int64_t n, const double *a, int64_t lda, double *b, +inline int64_t lacpy(char u, int64_t m, int64_t n, const double* a, int64_t lda, double* b, int64_t ldb) { return LAPACKE_dlacpy(LAPACK_COL_MAJOR, u, m, n, a, lda, b, ldb); } -inline int64_t lacpy(char u, int64_t m, int64_t n, const float *a, int64_t lda, float *b, +inline int64_t lacpy(char u, int64_t m, int64_t n, const float* a, int64_t lda, float* b, int64_t ldb) { return LAPACKE_slacpy(LAPACK_COL_MAJOR, u, m, n, a, lda, b, ldb); } -inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb) { +inline int64_t lacpy(char u, int64_t m, int64_t n, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb) { return LAPACKE_zlacpy(LAPACK_COL_MAJOR, u, m, n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const std::complex *a, - int64_t lda, std::complex *b, int64_t ldb) { +inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const std::complex* a, + int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_clacpy(LAPACK_COL_MAJOR, to_char(u), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const double *a, int64_t lda, - double *b, int64_t ldb) { +inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const double* a, int64_t lda, + double* b, int64_t ldb) { return LAPACKE_dlacpy(LAPACK_COL_MAJOR, to_char(u), m, n, a, lda, b, ldb); } -inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const float *a, int64_t lda, - float *b, int64_t ldb) { +inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const float* a, int64_t lda, + float* b, int64_t ldb) { return LAPACKE_slacpy(LAPACK_COL_MAJOR, to_char(u), m, n, a, lda, b, ldb); } -inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const std::complex *a, - int64_t lda, std::complex *b, int64_t ldb) { +inline int64_t lacpy(oneapi::mkl::uplo u, int64_t m, int64_t n, const std::complex* a, + int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_zlacpy(LAPACK_COL_MAJOR, to_char(u), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, std::complex alpha, - std::complex beta, std::complex *a, int64_t lda) { + std::complex beta, std::complex* a, int64_t lda) { return LAPACKE_claset(LAPACK_COL_MAJOR, to_char(u), m, n, - reinterpret_cast(alpha), - reinterpret_cast(beta), - reinterpret_cast(a), lda); + reinterpret_cast(alpha), + reinterpret_cast(beta), + reinterpret_cast(a), lda); } inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, double alpha, double beta, - double *a, int64_t lda) { + double* a, int64_t lda) { return LAPACKE_dlaset(LAPACK_COL_MAJOR, to_char(u), m, n, alpha, beta, a, lda); } -inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, float alpha, float beta, float *a, +inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, float alpha, float beta, float* a, int64_t lda) { return LAPACKE_slaset(LAPACK_COL_MAJOR, to_char(u), m, n, alpha, beta, a, lda); } inline int64_t laset(oneapi::mkl::uplo u, int64_t m, int64_t n, std::complex alpha, - std::complex beta, std::complex *a, int64_t lda) { + std::complex beta, std::complex* a, int64_t lda) { return LAPACKE_zlaset(LAPACK_COL_MAJOR, to_char(u), m, n, - reinterpret_cast(alpha), - reinterpret_cast(beta), - reinterpret_cast(a), lda); + reinterpret_cast(alpha), + reinterpret_cast(beta), + reinterpret_cast(a), lda); } inline int64_t laset(char u, int64_t m, int64_t n, std::complex alpha, - std::complex beta, std::complex *a, int64_t lda) { - return LAPACKE_claset(LAPACK_COL_MAJOR, u, m, n, - reinterpret_cast(alpha), - reinterpret_cast(beta), - reinterpret_cast(a), lda); + std::complex beta, std::complex* a, int64_t lda) { + return LAPACKE_claset(LAPACK_COL_MAJOR, u, m, n, reinterpret_cast(alpha), + reinterpret_cast(beta), + reinterpret_cast(a), lda); } -inline int64_t laset(char u, int64_t m, int64_t n, double alpha, double beta, double *a, +inline int64_t laset(char u, int64_t m, int64_t n, double alpha, double beta, double* a, int64_t lda) { return LAPACKE_dlaset(LAPACK_COL_MAJOR, u, m, n, alpha, beta, a, lda); } -inline int64_t laset(char u, int64_t m, int64_t n, float alpha, float beta, float *a, int64_t lda) { +inline int64_t laset(char u, int64_t m, int64_t n, float alpha, float beta, float* a, int64_t lda) { return LAPACKE_slaset(LAPACK_COL_MAJOR, u, m, n, alpha, beta, a, lda); } inline int64_t laset(char u, int64_t m, int64_t n, std::complex alpha, - std::complex beta, std::complex *a, int64_t lda) { + std::complex beta, std::complex* a, int64_t lda) { return LAPACKE_zlaset(LAPACK_COL_MAJOR, u, m, n, - reinterpret_cast(alpha), - reinterpret_cast(beta), - reinterpret_cast(a), lda); + reinterpret_cast(alpha), + reinterpret_cast(beta), + reinterpret_cast(a), lda); } -inline int64_t gebrd(int64_t m, int64_t n, std::complex *a, int64_t lda, float *d, float *e, - std::complex *tauq, std::complex *taup) { - return LAPACKE_cgebrd(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - d, e, reinterpret_cast(tauq), - reinterpret_cast(taup)); +inline int64_t gebrd(int64_t m, int64_t n, std::complex* a, int64_t lda, float* d, float* e, + std::complex* tauq, std::complex* taup) { + return LAPACKE_cgebrd(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + d, e, reinterpret_cast(tauq), + reinterpret_cast(taup)); } -inline int64_t gebrd(int64_t m, int64_t n, double *a, int64_t lda, double *d, double *e, - double *tauq, double *taup) { +inline int64_t gebrd(int64_t m, int64_t n, double* a, int64_t lda, double* d, double* e, + double* tauq, double* taup) { return LAPACKE_dgebrd(LAPACK_COL_MAJOR, m, n, a, lda, d, e, tauq, taup); } -inline int64_t gebrd(int64_t m, int64_t n, float *a, int64_t lda, float *d, float *e, float *tauq, - float *taup) { +inline int64_t gebrd(int64_t m, int64_t n, float* a, int64_t lda, float* d, float* e, float* tauq, + float* taup) { return LAPACKE_sgebrd(LAPACK_COL_MAJOR, m, n, a, lda, d, e, tauq, taup); } -inline int64_t gebrd(int64_t m, int64_t n, std::complex *a, int64_t lda, double *d, - double *e, std::complex *tauq, std::complex *taup) { - return LAPACKE_zgebrd(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - d, e, reinterpret_cast(tauq), - reinterpret_cast(taup)); +inline int64_t gebrd(int64_t m, int64_t n, std::complex* a, int64_t lda, double* d, + double* e, std::complex* tauq, std::complex* taup) { + return LAPACKE_zgebrd(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + d, e, reinterpret_cast(tauq), + reinterpret_cast(taup)); } -inline int64_t geqrf(int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau) { - return LAPACKE_cgeqrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(tau)); +inline int64_t geqrf(int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau) { + return LAPACKE_cgeqrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t geqrf(int64_t m, int64_t n, double *a, int64_t lda, double *tau) { +inline int64_t geqrf(int64_t m, int64_t n, double* a, int64_t lda, double* tau) { return LAPACKE_dgeqrf(LAPACK_COL_MAJOR, m, n, a, lda, tau); } -inline int64_t geqrf(int64_t m, int64_t n, float *a, int64_t lda, float *tau) { +inline int64_t geqrf(int64_t m, int64_t n, float* a, int64_t lda, float* tau) { return LAPACKE_sgeqrf(LAPACK_COL_MAJOR, m, n, a, lda, tau); } -inline int64_t geqrf(int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau) { - return LAPACKE_zgeqrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(tau)); +inline int64_t geqrf(int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau) { + return LAPACKE_zgeqrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t gerqf(int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau) { - return LAPACKE_cgerqf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(tau)); +inline int64_t gerqf(int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau) { + return LAPACKE_cgerqf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t gerqf(int64_t m, int64_t n, double *a, int64_t lda, double *tau) { +inline int64_t gerqf(int64_t m, int64_t n, double* a, int64_t lda, double* tau) { return LAPACKE_dgerqf(LAPACK_COL_MAJOR, m, n, a, lda, tau); } -inline int64_t gerqf(int64_t m, int64_t n, float *a, int64_t lda, float *tau) { +inline int64_t gerqf(int64_t m, int64_t n, float* a, int64_t lda, float* tau) { return LAPACKE_sgerqf(LAPACK_COL_MAJOR, m, n, a, lda, tau); } -inline int64_t gerqf(int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau) { - return LAPACKE_zgerqf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(tau)); +inline int64_t gerqf(int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau) { + return LAPACKE_zgerqf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(tau)); } inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_t m, int64_t n, - std::complex *a, int64_t lda, float *s, std::complex *u, - int64_t ldu, std::complex *vt, int64_t ldvt, float *superb) { + std::complex* a, int64_t lda, float* s, std::complex* u, + int64_t ldu, std::complex* vt, int64_t ldvt, float* superb) { return LAPACKE_cgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, - reinterpret_cast(a), lda, s, - reinterpret_cast(u), ldu, - reinterpret_cast(vt), ldvt, superb); + reinterpret_cast(a), lda, s, + reinterpret_cast(u), ldu, + reinterpret_cast(vt), ldvt, superb); } inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_t m, int64_t n, - double *a, int64_t lda, double *s, double *u, int64_t ldu, double *vt, - int64_t ldvt, double *superb) { + double* a, int64_t lda, double* s, double* u, int64_t ldu, double* vt, + int64_t ldvt, double* superb) { return LAPACKE_dgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, superb); } inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_t m, int64_t n, - float *a, int64_t lda, float *s, float *u, int64_t ldu, float *vt, - int64_t ldvt, float *superb) { + float* a, int64_t lda, float* s, float* u, int64_t ldu, float* vt, + int64_t ldvt, float* superb) { return LAPACKE_sgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, a, lda, s, u, ldu, vt, ldvt, superb); } inline int64_t gesvd(oneapi::mkl::jobsvd jobu, oneapi::mkl::jobsvd jobvt, int64_t m, int64_t n, - std::complex *a, int64_t lda, double *s, std::complex *u, - int64_t ldu, std::complex *vt, int64_t ldvt, double *superb) { + std::complex* a, int64_t lda, double* s, std::complex* u, + int64_t ldu, std::complex* vt, int64_t ldvt, double* superb) { return LAPACKE_zgesvd(LAPACK_COL_MAJOR, to_char(jobu), to_char(jobvt), m, n, - reinterpret_cast(a), lda, s, - reinterpret_cast(u), ldu, - reinterpret_cast(vt), ldvt, superb); + reinterpret_cast(a), lda, s, + reinterpret_cast(u), ldu, + reinterpret_cast(vt), ldvt, superb); } -inline int64_t getrf(int64_t m, int64_t n, std::complex *a, int64_t lda, int64_t *ipiv) { - return LAPACKE_cgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); +inline int64_t getrf(int64_t m, int64_t n, std::complex* a, int64_t lda, int64_t* ipiv) { + return LAPACKE_cgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t getrf(int64_t m, int64_t n, double *a, int64_t lda, int64_t *ipiv) { - return LAPACKE_dgetrf(LAPACK_COL_MAJOR, m, n, a, lda, reinterpret_cast(ipiv)); +inline int64_t getrf(int64_t m, int64_t n, double* a, int64_t lda, int64_t* ipiv) { + return LAPACKE_dgetrf(LAPACK_COL_MAJOR, m, n, a, lda, reinterpret_cast(ipiv)); } -inline int64_t getrf(int64_t m, int64_t n, float *a, int64_t lda, int64_t *ipiv) { - return LAPACKE_sgetrf(LAPACK_COL_MAJOR, m, n, a, lda, reinterpret_cast(ipiv)); +inline int64_t getrf(int64_t m, int64_t n, float* a, int64_t lda, int64_t* ipiv) { + return LAPACKE_sgetrf(LAPACK_COL_MAJOR, m, n, a, lda, reinterpret_cast(ipiv)); } -inline int64_t getrf(int64_t m, int64_t n, std::complex *a, int64_t lda, int64_t *ipiv) { - return LAPACKE_zgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); +inline int64_t getrf(int64_t m, int64_t n, std::complex* a, int64_t lda, int64_t* ipiv) { + return LAPACKE_zgetrf(LAPACK_COL_MAJOR, m, n, reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t heevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, std::complex *a, - int64_t lda, float *w) { +inline int64_t heevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, std::complex* a, + int64_t lda, float* w) { return LAPACKE_cheevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, - reinterpret_cast(a), lda, w); + reinterpret_cast(a), lda, w); } -inline int64_t heevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, std::complex *a, - int64_t lda, double *w) { +inline int64_t heevd(oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, std::complex* a, + int64_t lda, double* w) { return LAPACKE_zheevd(LAPACK_COL_MAJOR, to_char(j), to_char(u), n, - reinterpret_cast(a), lda, w); + reinterpret_cast(a), lda, w); } inline int64_t hegvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, - std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - float *w) { + std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + float* w) { return LAPACKE_chegvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb, w); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb, w); } inline int64_t hegvd(int64_t itype, oneapi::mkl::job j, oneapi::mkl::uplo u, int64_t n, - std::complex *a, int64_t lda, std::complex *b, int64_t ldb, - double *w) { + std::complex* a, int64_t lda, std::complex* b, int64_t ldb, + double* w) { return LAPACKE_zhegvd(LAPACK_COL_MAJOR, itype, to_char(j), to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb, w); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb, w); } -inline int64_t hetrd(oneapi::mkl::uplo u, int64_t n, std::complex *a, int64_t lda, float *d, - float *e, std::complex *tau) { +inline int64_t hetrd(oneapi::mkl::uplo u, int64_t n, std::complex* a, int64_t lda, float* d, + float* e, std::complex* tau) { return LAPACKE_chetrd(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, d, e, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, d, e, + reinterpret_cast(tau)); } -inline int64_t hetrd(oneapi::mkl::uplo u, int64_t n, std::complex *a, int64_t lda, - double *d, double *e, std::complex *tau) { +inline int64_t hetrd(oneapi::mkl::uplo u, int64_t n, std::complex* a, int64_t lda, + double* d, double* e, std::complex* tau) { return LAPACKE_zhetrd(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, d, e, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, d, e, + reinterpret_cast(tau)); } -inline int64_t hetrf(oneapi::mkl::uplo u, int64_t n, std::complex *a, int64_t lda, - int64_t *ipiv) { +inline int64_t hetrf(oneapi::mkl::uplo u, int64_t n, std::complex* a, int64_t lda, + int64_t* ipiv) { return LAPACKE_chetrf(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); + reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t hetrf(oneapi::mkl::uplo u, int64_t n, std::complex *a, int64_t lda, - int64_t *ipiv) { +inline int64_t hetrf(oneapi::mkl::uplo u, int64_t n, std::complex* a, int64_t lda, + int64_t* ipiv) { return LAPACKE_zhetrf(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); + reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t ungtr(oneapi::mkl::uplo u, int64_t n, std::complex *a, int64_t lda, - const std::complex *tau) { +inline int64_t ungtr(oneapi::mkl::uplo u, int64_t n, std::complex* a, int64_t lda, + const std::complex* tau) { return LAPACKE_cungtr(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, + reinterpret_cast(tau)); } -inline int64_t ungtr(oneapi::mkl::uplo u, int64_t n, std::complex *a, int64_t lda, - const std::complex *tau) { +inline int64_t ungtr(oneapi::mkl::uplo u, int64_t n, std::complex* a, int64_t lda, + const std::complex* tau) { return LAPACKE_zungtr(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, + reinterpret_cast(tau)); } inline int64_t unmtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans, - int64_t m, int64_t n, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t m, int64_t n, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_cunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } inline int64_t unmtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans, - int64_t m, int64_t n, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t m, int64_t n, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_zunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } -inline int64_t orgtr(oneapi::mkl::uplo u, int64_t n, double *a, int64_t lda, const double *tau) { +inline int64_t orgtr(oneapi::mkl::uplo u, int64_t n, double* a, int64_t lda, const double* tau) { return LAPACKE_dorgtr(LAPACK_COL_MAJOR, to_char(u), n, a, lda, tau); } -inline int64_t orgtr(oneapi::mkl::uplo u, int64_t n, float *a, int64_t lda, const float *tau) { +inline int64_t orgtr(oneapi::mkl::uplo u, int64_t n, float* a, int64_t lda, const float* tau) { return LAPACKE_sorgtr(LAPACK_COL_MAJOR, to_char(u), n, a, lda, tau); } inline int64_t ormtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans, - int64_t m, int64_t n, float *a, int64_t lda, const float *tau, float *c, + int64_t m, int64_t n, float* a, int64_t lda, const float* tau, float* c, int64_t ldc) { return LAPACKE_sormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda, tau, c, ldc); } inline int64_t ormtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans, - int64_t m, int64_t n, double *a, int64_t lda, const double *tau, double *c, + int64_t m, int64_t n, double* a, int64_t lda, const double* tau, double* c, int64_t ldc) { return LAPACKE_dormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda, tau, c, ldc); } inline int64_t or_un_mtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans, - int64_t m, int64_t n, float *a, int64_t lda, const float *tau, float *c, + int64_t m, int64_t n, float* a, int64_t lda, const float* tau, float* c, int64_t ldc) { return LAPACKE_sormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda, tau, c, ldc); } inline int64_t or_un_mtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans, - int64_t m, int64_t n, double *a, int64_t lda, const double *tau, double *c, + int64_t m, int64_t n, double* a, int64_t lda, const double* tau, double* c, int64_t ldc) { return LAPACKE_dormtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, a, lda, tau, c, ldc); } inline int64_t or_un_mtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans, - int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau, std::complex *c, int64_t ldc) { + int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_cunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } inline int64_t or_un_mtr(oneapi::mkl::side side, oneapi::mkl::uplo u, oneapi::mkl::transpose trans, - int64_t m, int64_t n, std::complex *a, int64_t lda, - std::complex *tau, std::complex *c, int64_t ldc) { + int64_t m, int64_t n, std::complex* a, int64_t lda, + std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_zunmtr(LAPACK_COL_MAJOR, to_char(side), to_char(u), to_char(trans), m, n, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } -inline int64_t sytrd(oneapi::mkl::uplo u, int64_t n, float *a, int64_t lda, float *d, float *e, - float *tau) { +inline int64_t sytrd(oneapi::mkl::uplo u, int64_t n, float* a, int64_t lda, float* d, float* e, + float* tau) { return LAPACKE_ssytrd(LAPACK_COL_MAJOR, to_char(u), n, a, lda, d, e, tau); } -inline int64_t sytrd(oneapi::mkl::uplo u, int64_t n, double *a, int64_t lda, double *d, double *e, - double *tau) { +inline int64_t sytrd(oneapi::mkl::uplo u, int64_t n, double* a, int64_t lda, double* d, double* e, + double* tau) { return LAPACKE_dsytrd(LAPACK_COL_MAJOR, to_char(u), n, a, lda, d, e, tau); } -inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, float *a, int64_t lda, int64_t *ipiv) { +inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, float* a, int64_t lda, int64_t* ipiv) { return LAPACKE_ssytrf(LAPACK_COL_MAJOR, to_char(u), n, a, lda, - reinterpret_cast(ipiv)); + reinterpret_cast(ipiv)); } -inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, double *a, int64_t lda, int64_t *ipiv) { +inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, double* a, int64_t lda, int64_t* ipiv) { return LAPACKE_dsytrf(LAPACK_COL_MAJOR, to_char(u), n, a, lda, - reinterpret_cast(ipiv)); + reinterpret_cast(ipiv)); } -inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, std::complex *a, int64_t lda, - int64_t *ipiv) { +inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, std::complex* a, int64_t lda, + int64_t* ipiv) { return LAPACKE_csytrf(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); + reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, std::complex *a, int64_t lda, - int64_t *ipiv) { +inline int64_t sytrf(oneapi::mkl::uplo u, int64_t n, std::complex* a, int64_t lda, + int64_t* ipiv) { return LAPACKE_zsytrf(LAPACK_COL_MAJOR, to_char(u), n, - reinterpret_cast(a), lda, - reinterpret_cast(ipiv)); + reinterpret_cast(a), lda, + reinterpret_cast(ipiv)); } -inline void orgbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k, double *a, - int64_t lda, const double *tau) { +inline void orgbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k, double* a, + int64_t lda, const double* tau) { LAPACKE_dorgbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, a, lda, tau); } -inline void orgbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k, float *a, - int64_t lda, const float *tau) { +inline void orgbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k, float* a, + int64_t lda, const float* tau) { LAPACKE_sorgbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, a, lda, tau); } -inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, float *a, int64_t lda, const float *tau) { +inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, float* a, int64_t lda, const float* tau) { return LAPACKE_sorgqr(LAPACK_COL_MAJOR, m, n, k, a, lda, tau); } -inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, double *a, int64_t lda, - const double *tau) { +inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, double* a, int64_t lda, + const double* tau) { return LAPACKE_dorgqr(LAPACK_COL_MAJOR, m, n, k, a, lda, tau); } -inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex *a, int64_t lda, - const std::complex *tau) { - return LAPACKE_cungqr(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), - lda, reinterpret_cast(tau)); +inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex* a, int64_t lda, + const std::complex* tau) { + return LAPACKE_cungqr(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), + lda, reinterpret_cast(tau)); } -inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex *a, int64_t lda, - const std::complex *tau) { - return LAPACKE_zungqr(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), - lda, reinterpret_cast(tau)); +inline int64_t or_un_gqr(int64_t m, int64_t n, int64_t k, std::complex* a, int64_t lda, + const std::complex* tau) { + return LAPACKE_zungqr(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), + lda, reinterpret_cast(tau)); } inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n, - int64_t k, const float *a, int64_t lda, const float *tau, float *c, + int64_t k, const float* a, int64_t lda, const float* tau, float* c, int64_t ldc) { return LAPACKE_sormqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc); } inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n, - int64_t k, const double *a, int64_t lda, const double *tau, double *c, + int64_t k, const double* a, int64_t lda, const double* tau, double* c, int64_t ldc) { return LAPACKE_dormqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc); } inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n, - int64_t k, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t k, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_cunmqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } inline int64_t or_un_mqr(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n, - int64_t k, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t k, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_zunmqr(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } -inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, float *a, int64_t lda, const float *tau) { +inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, float* a, int64_t lda, const float* tau) { return LAPACKE_sorgrq(LAPACK_COL_MAJOR, m, n, k, a, lda, tau); } -inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, double *a, int64_t lda, - const double *tau) { +inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, double* a, int64_t lda, + const double* tau) { return LAPACKE_dorgrq(LAPACK_COL_MAJOR, m, n, k, a, lda, tau); } -inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex *a, int64_t lda, - const std::complex *tau) { - return LAPACKE_cungrq(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), - lda, reinterpret_cast(tau)); +inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex* a, int64_t lda, + const std::complex* tau) { + return LAPACKE_cungrq(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), + lda, reinterpret_cast(tau)); } -inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex *a, int64_t lda, - const std::complex *tau) { - return LAPACKE_zungrq(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), - lda, reinterpret_cast(tau)); +inline int64_t or_un_grq(int64_t m, int64_t n, int64_t k, std::complex* a, int64_t lda, + const std::complex* tau) { + return LAPACKE_zungrq(LAPACK_COL_MAJOR, m, n, k, reinterpret_cast(a), + lda, reinterpret_cast(tau)); } inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n, - int64_t k, const float *a, int64_t lda, const float *tau, float *c, + int64_t k, const float* a, int64_t lda, const float* tau, float* c, int64_t ldc) { return LAPACKE_sormrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc); } inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n, - int64_t k, const double *a, int64_t lda, const double *tau, double *c, + int64_t k, const double* a, int64_t lda, const double* tau, double* c, int64_t ldc) { return LAPACKE_dormrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, a, lda, tau, c, ldc); } inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n, - int64_t k, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t k, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_cunmrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } inline int64_t or_un_mrq(oneapi::mkl::side s, oneapi::mkl::transpose t, int64_t m, int64_t n, - int64_t k, const std::complex *a, int64_t lda, - const std::complex *tau, std::complex *c, int64_t ldc) { + int64_t k, const std::complex* a, int64_t lda, + const std::complex* tau, std::complex* c, int64_t ldc) { return LAPACKE_zunmrq(LAPACK_COL_MAJOR, to_char(s), to_char(t), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau), - reinterpret_cast(c), ldc); + reinterpret_cast(a), lda, + reinterpret_cast(tau), + reinterpret_cast(c), ldc); } -inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, std::complex *a, +inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, std::complex* a, int64_t lda) { return LAPACKE_cpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, double *a, int64_t lda) { +inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, double* a, int64_t lda) { return LAPACKE_dpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda); } -inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, float *a, int64_t lda) { +inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, float* a, int64_t lda) { return LAPACKE_spotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda); } -inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, std::complex *a, +inline int64_t potrf(oneapi::mkl::uplo upper_lower, int64_t n, std::complex* a, int64_t lda) { return LAPACKE_zpotrf(LAPACK_COL_MAJOR, to_char(upper_lower), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs, - const std::complex *a, int64_t lda, std::complex *b, + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_cpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs, const double *a, - int64_t lda, double *b, int64_t ldb) { +inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs, const double* a, + int64_t lda, double* b, int64_t ldb) { return LAPACKE_dpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, a, lda, b, ldb); } -inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs, const float *a, - int64_t lda, float *b, int64_t ldb) { +inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs, const float* a, + int64_t lda, float* b, int64_t ldb) { return LAPACKE_spotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, a, lda, b, ldb); } inline int64_t potrs(oneapi::mkl::uplo upper_lower, int64_t n, int64_t nrhs, - const std::complex *a, int64_t lda, std::complex *b, + const std::complex* a, int64_t lda, std::complex* b, int64_t ldb) { return LAPACKE_zpotrs(LAPACK_COL_MAJOR, to_char(upper_lower), n, nrhs, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } -inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, std::complex *a, +inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, std::complex* a, int64_t lda) { return LAPACKE_cpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, double *a, int64_t lda) { +inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, double* a, int64_t lda) { return LAPACKE_dpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda); } -inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, float *a, int64_t lda) { +inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, float* a, int64_t lda) { return LAPACKE_spotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, a, lda); } -inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, std::complex *a, +inline int64_t potri(oneapi::mkl::uplo upper_lower, int64_t n, std::complex* a, int64_t lda) { return LAPACKE_zpotri(LAPACK_COL_MAJOR, to_char(upper_lower), n, - reinterpret_cast(a), lda); + reinterpret_cast(a), lda); } -inline int64_t laswp(int64_t n, std::complex *a, int64_t lda, int64_t k1, int64_t k2, - const int64_t *ipiv, int64_t incx) { - return LAPACKE_claswp(LAPACK_COL_MAJOR, n, reinterpret_cast(a), lda, k1, - k2, reinterpret_cast(ipiv), incx); +inline int64_t laswp(int64_t n, std::complex* a, int64_t lda, int64_t k1, int64_t k2, + const int64_t* ipiv, int64_t incx) { + return LAPACKE_claswp(LAPACK_COL_MAJOR, n, reinterpret_cast(a), lda, k1, + k2, reinterpret_cast(ipiv), incx); } -inline int64_t laswp(int64_t n, double *a, int64_t lda, int64_t k1, int64_t k2, const int64_t *ipiv, +inline int64_t laswp(int64_t n, double* a, int64_t lda, int64_t k1, int64_t k2, const int64_t* ipiv, int64_t incx) { return LAPACKE_dlaswp(LAPACK_COL_MAJOR, n, a, lda, k1, k2, - reinterpret_cast(ipiv), incx); + reinterpret_cast(ipiv), incx); } -inline int64_t laswp(int64_t n, float *a, int64_t lda, int64_t k1, int64_t k2, const int64_t *ipiv, +inline int64_t laswp(int64_t n, float* a, int64_t lda, int64_t k1, int64_t k2, const int64_t* ipiv, int64_t incx) { return LAPACKE_slaswp(LAPACK_COL_MAJOR, n, a, lda, k1, k2, - reinterpret_cast(ipiv), incx); + reinterpret_cast(ipiv), incx); } -inline int64_t laswp(int64_t n, std::complex *a, int64_t lda, int64_t k1, int64_t k2, - const int64_t *ipiv, int64_t incx) { - return LAPACKE_zlaswp(LAPACK_COL_MAJOR, n, reinterpret_cast(a), lda, - k1, k2, reinterpret_cast(ipiv), incx); +inline int64_t laswp(int64_t n, std::complex* a, int64_t lda, int64_t k1, int64_t k2, + const int64_t* ipiv, int64_t incx) { + return LAPACKE_zlaswp(LAPACK_COL_MAJOR, n, reinterpret_cast(a), lda, k1, + k2, reinterpret_cast(ipiv), incx); } inline void ungbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k, - std::complex *a, int64_t lda, const std::complex *tau) { + std::complex* a, int64_t lda, const std::complex* tau) { LAPACKE_cungbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, + reinterpret_cast(tau)); } inline void ungbr(oneapi::mkl::generate vect, int64_t m, int64_t n, int64_t k, - std::complex *a, int64_t lda, const std::complex *tau) { + std::complex* a, int64_t lda, const std::complex* tau) { LAPACKE_zungbr(LAPACK_COL_MAJOR, to_char(vect), m, n, k, - reinterpret_cast(a), lda, - reinterpret_cast(tau)); + reinterpret_cast(a), lda, + reinterpret_cast(tau)); } inline int64_t trtrs(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - int64_t n, int64_t nrhs, const float *a, int64_t lda, float *b, int64_t ldb) { + int64_t n, int64_t nrhs, const float* a, int64_t lda, float* b, int64_t ldb) { return LAPACKE_strtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs, a, lda, b, ldb); } inline int64_t trtrs(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - int64_t n, int64_t nrhs, const double *a, int64_t lda, double *b, + int64_t n, int64_t nrhs, const double* a, int64_t lda, double* b, int64_t ldb) { return LAPACKE_dtrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs, a, lda, b, ldb); } inline int64_t trtrs(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - int64_t n, int64_t nrhs, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb) { + int64_t n, int64_t nrhs, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb) { return LAPACKE_ctrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } inline int64_t trtrs(oneapi::mkl::uplo uplo, oneapi::mkl::transpose trans, oneapi::mkl::diag diag, - int64_t n, int64_t nrhs, const std::complex *a, int64_t lda, - std::complex *b, int64_t ldb) { + int64_t n, int64_t nrhs, const std::complex* a, int64_t lda, + std::complex* b, int64_t ldb) { return LAPACKE_ztrtrs(LAPACK_COL_MAJOR, to_char(uplo), to_char(trans), to_char(diag), n, nrhs, - reinterpret_cast(a), lda, - reinterpret_cast(b), ldb); + reinterpret_cast(a), lda, + reinterpret_cast(b), ldb); } } //namespace reference diff --git a/tests/unit_tests/main_test.cpp b/tests/unit_tests/main_test.cpp index 25277e4e0..7e2ad079a 100644 --- a/tests/unit_tests/main_test.cpp +++ b/tests/unit_tests/main_test.cpp @@ -114,42 +114,37 @@ int main(int argc, char** argv) { unique_devices.insert(dev.get_info()); #if !defined(ONEMKL_ENABLE_MKLCPU_BACKEND) && \ !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_CPU) && \ - !defined(ONEMKL_ENABLE_PORTFFT_BACKEND) && \ - !defined(ONEMKL_ENABLE_NETLIB_BACKEND) + !defined(ONEMKL_ENABLE_PORTFFT_BACKEND) && !defined(ONEMKL_ENABLE_NETLIB_BACKEND) if (dev.is_cpu()) continue; #endif -#if !defined(ONEMKL_ENABLE_MKLGPU_BACKEND) && \ - !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU) && \ - !defined(ONEMKL_ENABLE_PORTFFT_BACKEND) +#if !defined(ONEMKL_ENABLE_MKLGPU_BACKEND) && \ + !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_INTEL_GPU) && !defined(ONEMKL_ENABLE_PORTFFT_BACKEND) if (dev.is_gpu() && vendor_id == INTEL_ID) continue; #endif -#if !defined(ONEMKL_ENABLE_CUBLAS_BACKEND) && \ - !defined(ONEMKL_ENABLE_CURAND_BACKEND) && \ - !defined(ONEMKL_ENABLE_CUSOLVER_BACKEND) && \ - !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU) && \ +#if !defined(ONEMKL_ENABLE_CUBLAS_BACKEND) && !defined(ONEMKL_ENABLE_CURAND_BACKEND) && \ + !defined(ONEMKL_ENABLE_CUSOLVER_BACKEND) && \ + !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_NVIDIA_GPU) && \ !defined(ONEMKL_ENABLE_CUFFT_BACKEND) && !defined(ONEMKL_ENABLE_PORTFFT_BACKEND) if (dev.is_gpu() && vendor_id == NVIDIA_ID) continue; #endif -#if !defined(ONEMKL_ENABLE_ROCBLAS_BACKEND) && \ - !defined(ONEMKL_ENABLE_ROCRAND_BACKEND) && \ - !defined(ONEMKL_ENABLE_ROCSOLVER_BACKEND) && \ - !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_AMD_GPU) && \ - !defined(ONEMKL_ENABLE_ROCFFT_BACKEND) && \ +#if !defined(ONEMKL_ENABLE_ROCBLAS_BACKEND) && !defined(ONEMKL_ENABLE_ROCRAND_BACKEND) && \ + !defined(ONEMKL_ENABLE_ROCSOLVER_BACKEND) && \ + !defined(ONEMKL_ENABLE_PORTBLAS_BACKEND_AMD_GPU) && !defined(ONEMKL_ENABLE_ROCFFT_BACKEND) && \ !defined(ONEMKL_ENABLE_PORTFFT_BACKEND) if (dev.is_gpu() && vendor_id == AMD_ID) continue; #endif -// clang-format off + // clang-format off #ifdef __HIPSYCL__ if (dev.is_accelerator()) #else if (!dev.is_accelerator()) -// clang-format on #endif local_devices.push_back(dev); + // clang-format on } } catch (std::exception const& e) { diff --git a/tests/unit_tests/rng/device/include/moments.hpp b/tests/unit_tests/rng/device/include/moments.hpp index 8acf20bf9..51fe22bcb 100644 --- a/tests/unit_tests/rng/device/include/moments.hpp +++ b/tests/unit_tests/rng/device/include/moments.hpp @@ -59,9 +59,8 @@ class moments_test { std::is_same_v< Distribution, oneapi::mkl::rng::device::poisson< - std::int32_t, oneapi::mkl::rng::device::poisson_method::devroye>>)&&!queue - .get_device() - .has(sycl::aspect::fp64)) { + std::int32_t, oneapi::mkl::rng::device::poisson_method::devroye>>) && + !queue.get_device().has(sycl::aspect::fp64)) { status = test_skipped; return; } diff --git a/tests/unit_tests/rng/device/include/rng_device_test_common.hpp b/tests/unit_tests/rng/device/include/rng_device_test_common.hpp index 33533255e..74c6ba503 100644 --- a/tests/unit_tests/rng/device/include/rng_device_test_common.hpp +++ b/tests/unit_tests/rng/device/include/rng_device_test_common.hpp @@ -34,10 +34,9 @@ #define N_GEN_SERVICE (N_ENGINES * N_PORTION) // defines for skip_ahead_ex tests -#define N_SKIP ((std::uint64_t)pow(2, 62)) -#define SKIP_TIMES ((std::int32_t)pow(2, 14)) -#define NUM_TO_SKIP \ - { 0, (std::uint64_t)pow(2, 12) } +#define N_SKIP ((std::uint64_t)pow(2, 62)) +#define SKIP_TIMES ((std::int32_t)pow(2, 14)) +#define NUM_TO_SKIP { 0, (std::uint64_t)pow(2, 12) } // Correctness checking. static inline bool check_equal_device(float x, float x_ref) { diff --git a/tests/unit_tests/rng/include/rng_test_common.hpp b/tests/unit_tests/rng/include/rng_test_common.hpp index d2bc4859f..2acfd784c 100644 --- a/tests/unit_tests/rng/include/rng_test_common.hpp +++ b/tests/unit_tests/rng/include/rng_test_common.hpp @@ -34,10 +34,9 @@ #define N_GEN_SERVICE (N_ENGINES * N_PORTION) // defines for skip_ahead_ex tests -#define N_SKIP ((std::uint64_t)pow(2, 62)) -#define SKIP_TIMES ((std::int32_t)pow(2, 14)) -#define NUM_TO_SKIP \ - { 0, (std::uint64_t)pow(2, 12) } +#define N_SKIP ((std::uint64_t)pow(2, 62)) +#define SKIP_TIMES ((std::int32_t)pow(2, 14)) +#define NUM_TO_SKIP { 0, (std::uint64_t)pow(2, 12) } // Correctness checking. static inline bool check_equal(float x, float x_ref) { diff --git a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp index 62b213100..675d8930a 100644 --- a/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp +++ b/tests/unit_tests/sparse_blas/include/common_sparse_reference.hpp @@ -55,9 +55,9 @@ inline T opVal(const T t, const bool isConj) { }; template -void do_csr_transpose(const oneapi::mkl::transpose opA, intType *ia_t, intType *ja_t, fpType *a_t, - intType a_nrows, intType a_ncols, intType indexing, accIntType &ia, - accIntType &ja, accFpType &a, const bool structOnlyFlag = false) { +void do_csr_transpose(const oneapi::mkl::transpose opA, intType* ia_t, intType* ja_t, fpType* a_t, + intType a_nrows, intType a_ncols, intType indexing, accIntType& ia, + accIntType& ja, accFpType& a, const bool structOnlyFlag = false) { const bool isConj = (opA == oneapi::mkl::transpose::conjtrans); // initialize ia_t to zero @@ -105,7 +105,7 @@ void do_csr_transpose(const oneapi::mkl::transpose opA, intType *ia_t, intType * // Transpose the given sparse matrix if needed template -auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpType *a, +auto sparse_transpose_if_needed(const intType* ia, const intType* ja, const fpType* a, intType a_nrows, intType a_ncols, std::size_t nnz, intType indexing, oneapi::mkl::transpose transpose_val) { std::vector iopa; @@ -134,7 +134,7 @@ auto sparse_transpose_if_needed(const intType *ia, const intType *ja, const fpTy /// Reduce the leading dimension to the minimum and transpose the matrix if needed /// The outputted matrix always uses row major layout template -auto extract_dense_matrix(const fpType *x, std::size_t nrows, std::size_t ncols, std::size_t ld, +auto extract_dense_matrix(const fpType* x, std::size_t nrows, std::size_t ncols, std::size_t ld, oneapi::mkl::transpose transpose_val, oneapi::mkl::layout dense_matrix_layout) { const bool is_row_major = dense_matrix_layout == oneapi::mkl::layout::row_major; @@ -161,8 +161,8 @@ auto extract_dense_matrix(const fpType *x, std::size_t nrows, std::size_t ncols, /// Convert the sparse matrix in the given format to a dense matrix A in row major layout applied with A_view. template -std::vector sparse_to_dense(sparse_matrix_format_t format, const intType *ia, - const intType *ja, const fpType *a, std::size_t a_nrows, +std::vector sparse_to_dense(sparse_matrix_format_t format, const intType* ia, + const intType* ja, const fpType* a, std::size_t a_nrows, std::size_t a_ncols, std::size_t nnz, intType indexing, oneapi::mkl::transpose transpose_val, oneapi::mkl::sparse::matrix_view A_view) { diff --git a/tests/unit_tests/sparse_blas/include/test_common.hpp b/tests/unit_tests/sparse_blas/include/test_common.hpp index c11255a9a..628f55e2e 100644 --- a/tests/unit_tests/sparse_blas/include/test_common.hpp +++ b/tests/unit_tests/sparse_blas/include/test_common.hpp @@ -66,16 +66,16 @@ static std::vector> test_matrix_p oneapi::mkl::sparse::matrix_property::symmetric } }; -void print_error_code(sycl::exception const &e); +void print_error_code(sycl::exception const& e); // Catch asynchronous exceptions. struct exception_handler_t { void operator()(sycl::exception_list exceptions) { - for (std::exception_ptr const &e : exceptions) { + for (std::exception_ptr const& e : exceptions) { try { std::rethrow_exception(e); } - catch (sycl::exception const &e) { + catch (sycl::exception const& e) { std::cout << "Caught asynchronous SYCL exception:\n" << e.what() << std::endl; print_error_code(e); } @@ -86,7 +86,7 @@ struct exception_handler_t { struct UsmDeleter { sycl::queue q; UsmDeleter(sycl::queue _q) : q(_q) {} - void operator()(void *ptr) { + void operator()(void* ptr) { sycl::free(ptr, q); } }; @@ -99,14 +99,14 @@ auto malloc_device_uptr(sycl::queue q, std::size_t num_elts) { // SYCL buffer creation helper. template -sycl::buffer make_buffer(const vec &v) { +sycl::buffer make_buffer(const vec& v) { sycl::buffer buf(v.data(), sycl::range<1>(v.size())); return buf; } template -void copy_host_to_buffer(sycl::queue queue, const std::vector &src, sycl::buffer dst) { - queue.submit([&](sycl::handler &cgh) { +void copy_host_to_buffer(sycl::queue queue, const std::vector& src, sycl::buffer dst) { + queue.submit([&](sycl::handler& cgh) { auto dst_acc = dst.template get_access( cgh, sycl::range<1>(src.size())); cgh.copy(src.data(), dst_acc); @@ -168,7 +168,7 @@ struct rand_scalar> { }; template -void rand_vector(std::vector &v, std::size_t n) { +void rand_vector(std::vector& v, std::size_t n) { using fpRealType = typename complex_info::real_type; v.resize(n); rand_scalar rand; @@ -178,7 +178,7 @@ void rand_vector(std::vector &v, std::size_t n) { } template -void rand_matrix(std::vector &m, oneapi::mkl::layout layout_val, std::size_t nrows, +void rand_matrix(std::vector& m, oneapi::mkl::layout layout_val, std::size_t nrows, std::size_t ncols, std::size_t ld, oneapi::mkl::transpose transpose_val = oneapi::mkl::transpose::nontrans) { using fpRealType = typename complex_info::real_type; @@ -221,8 +221,8 @@ fpType generate_data(bool is_diag) { template intType generate_random_csr_matrix(const intType nrows, const intType ncols, const double density_val, intType indexing, - std::vector &ia, std::vector &ja, - std::vector &a, bool is_symmetric, + std::vector& ia, std::vector& ja, + std::vector& a, bool is_symmetric, bool require_diagonal = false) { intType nnz = 0; rand_scalar rand_density; @@ -272,8 +272,8 @@ intType generate_random_csr_matrix(const intType nrows, const intType ncols, template intType generate_random_coo_matrix(const intType nrows, const intType ncols, const double density_val, intType indexing, - std::vector &ia, std::vector &ja, - std::vector &a, bool is_symmetric, + std::vector& ia, std::vector& ja, + std::vector& a, bool is_symmetric, bool require_diagonal = false) { rand_scalar rand_density; @@ -315,8 +315,8 @@ intType generate_random_coo_matrix(const intType nrows, const intType ncols, template intType generate_random_matrix(sparse_matrix_format_t format, const intType nrows, const intType ncols, const double density_val, intType indexing, - std::vector &ia, std::vector &ja, - std::vector &a, bool is_symmetric, + std::vector& ia, std::vector& ja, + std::vector& a, bool is_symmetric, bool require_diagonal = false) { ia.clear(); ja.clear(); @@ -337,8 +337,8 @@ intType generate_random_matrix(sparse_matrix_format_t format, const intType nrow /// In CSR format, the elements within a row are shuffled without changing ia. /// In COO format, all the elements are shuffled. template -void shuffle_sparse_matrix(sparse_matrix_format_t format, intType indexing, intType *ia, - intType *ja, fpType *a, intType nnz, std::size_t nrows) { +void shuffle_sparse_matrix(sparse_matrix_format_t format, intType indexing, intType* ia, + intType* ja, fpType* a, intType nnz, std::size_t nrows) { if (format == sparse_matrix_format_t::CSR) { for (std::size_t i = 0; i < nrows; ++i) { intType nnz_row = ia[i + 1] - ia[i]; @@ -367,8 +367,8 @@ void shuffle_sparse_matrix(sparse_matrix_format_t format, intType indexing, intT /// Initialize a sparse matrix specified by the given format template -void init_sparse_matrix(sycl::queue &queue, sparse_matrix_format_t format, - oneapi::mkl::sparse::matrix_handle_t *p_smhandle, std::int64_t num_rows, +void init_sparse_matrix(sycl::queue& queue, sparse_matrix_format_t format, + oneapi::mkl::sparse::matrix_handle_t* p_smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) { if (format == sparse_matrix_format_t::CSR) { @@ -387,7 +387,7 @@ void init_sparse_matrix(sycl::queue &queue, sparse_matrix_format_t format, /// Reset the data of a sparse matrix specified by the given format template -void set_matrix_data(sycl::queue &queue, sparse_matrix_format_t format, +void set_matrix_data(sycl::queue& queue, sparse_matrix_format_t format, oneapi::mkl::sparse::matrix_handle_t smhandle, std::int64_t num_rows, std::int64_t num_cols, std::int64_t nnz, oneapi::mkl::index_base index, ContainerIndexT rows, ContainerIndexT cols, ContainerValueT vals) { @@ -406,8 +406,8 @@ void set_matrix_data(sycl::queue &queue, sparse_matrix_format_t format, } template -inline void free_handles(sycl::queue &queue, const std::vector dependencies, - HandlesT &&... handles) { +inline void free_handles(sycl::queue& queue, const std::vector dependencies, + HandlesT&&... handles) { // Fold expression so that handles expands to each value one after the other. ( [&] { @@ -436,19 +436,19 @@ inline void free_handles(sycl::queue &queue, const std::vector depe } template -inline void free_handles(sycl::queue &queue, HandlesT &&... handles) { +inline void free_handles(sycl::queue& queue, HandlesT&&... handles) { free_handles(queue, {}, handles...); } template -inline void wait_and_free_handles(sycl::queue &queue, HandlesT &&... handles) { +inline void wait_and_free_handles(sycl::queue& queue, HandlesT&&... handles) { queue.wait(); free_handles(queue, handles...); } inline bool require_square_matrix( oneapi::mkl::sparse::matrix_view A_view, - const std::set &matrix_properties) { + const std::set& matrix_properties) { const bool is_symmetric = matrix_properties.find(oneapi::mkl::sparse::matrix_property::symmetric) != matrix_properties.cend(); @@ -457,7 +457,7 @@ inline bool require_square_matrix( template bool check_equal(fpType x, fpType x_ref, double abs_error_margin, double rel_error_margin, - std::ostream &out) { + std::ostream& out) { using fpRealType = typename complex_info::real_type; static_assert(std::is_floating_point_v, "Expected floating-point real or complex type."); @@ -478,8 +478,8 @@ bool check_equal(fpType x, fpType x_ref, double abs_error_margin, double rel_err } template -bool check_equal_vector(const vecType1 &v, const vecType2 &v_ref, double abs_error_factor = 10.0, - double rel_error_factor = 200.0, std::ostream &out = std::cout) { +bool check_equal_vector(const vecType1& v, const vecType2& v_ref, double abs_error_factor = 10.0, + double rel_error_factor = 200.0, std::ostream& out = std::cout) { using T = typename vecType2::value_type; std::size_t n = v.size(); if (n != v_ref.size()) { @@ -492,7 +492,7 @@ bool check_equal_vector(const vecType1 &v, const vecType2 &v_ref, double abs_err auto max_norm_ref = *std::max_element(std::begin(v_ref), std::end(v_ref), - [](const T &a, const T &b) { return std::abs(a) < std::abs(b); }); + [](const T& a, const T& b) { return std::abs(a) < std::abs(b); }); // Heuristic for the average-case error margins double abs_error_margin = abs_error_factor * std::abs(max_norm_ref) * std::log2(static_cast(n)); diff --git a/tests/unit_tests/sparse_blas/include/test_spmm.hpp b/tests/unit_tests/sparse_blas/include/test_spmm.hpp index 6188d4268..17874cd63 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmm.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmm.hpp @@ -53,11 +53,11 @@ */ template void test_helper_with_format_with_transpose( - testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev, sparse_matrix_format_t format, - const std::vector &non_default_algorithms, - oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, int &num_passed, - int &num_skipped) { + const std::vector& non_default_algorithms, + oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, int& num_passed, + int& num_skipped) { double density_A_matrix = 0.8; fpType fp_zero = set_fp_value()(0.f, 0.f); fpType fp_one = set_fp_value()(1.f, 0.f); @@ -217,10 +217,10 @@ void test_helper_with_format_with_transpose( */ template void test_helper_with_format( - testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev, sparse_matrix_format_t format, - const std::vector &non_default_algorithms, int &num_passed, - int &num_skipped) { + const std::vector& non_default_algorithms, int& num_passed, + int& num_skipped) { std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans }; @@ -245,7 +245,7 @@ void test_helper_with_format( */ template void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, int &num_passed, int &num_skipped) { + sycl::device* dev, int& num_passed, int& num_skipped) { test_helper_with_format( test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, { oneapi::mkl::sparse::spmm_alg::no_optimize_alg, oneapi::mkl::sparse::spmm_alg::csr_alg1, @@ -261,14 +261,14 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6 /// Compute spmm reference as a dense operation template -void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *ia, - const intType *ja, const fpType *a, intType a_nrows, +void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType* ia, + const intType* ja, const fpType* a, intType a_nrows, intType a_ncols, intType c_ncols, intType a_nnz, intType indexing, oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose opA, oneapi::mkl::transpose opB, fpType alpha, fpType beta, intType ldb, intType ldc, - const fpType *b, oneapi::mkl::sparse::matrix_view A_view, - fpType *c_ref) { + const fpType* b, oneapi::mkl::sparse::matrix_view A_view, + fpType* c_ref) { std::size_t a_nrows_u = static_cast(a_nrows); std::size_t a_ncols_u = static_cast(a_ncols); std::size_t c_ncols_u = static_cast(c_ncols); @@ -300,7 +300,7 @@ void prepare_reference_spmm_data(sparse_matrix_format_t format, const intType *i for (std::size_t i = 0; i < opa_ncols; i++) { acc += dense_opa[row * opa_ncols + i] * dense_opb[i * c_ncols_u + col]; } - fpType &c = c_ref[dense_linear_idx(row, col, ldc_u)]; + fpType& c = c_ref[dense_linear_idx(row, col, ldc_u)]; c = alpha * acc + beta * c; } } diff --git a/tests/unit_tests/sparse_blas/include/test_spmv.hpp b/tests/unit_tests/sparse_blas/include/test_spmv.hpp index f141db893..654a1bfd4 100644 --- a/tests/unit_tests/sparse_blas/include/test_spmv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spmv.hpp @@ -52,10 +52,10 @@ */ template void test_helper_with_format_with_transpose( - testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev, sparse_matrix_format_t format, - const std::vector &non_default_algorithms, - oneapi::mkl::transpose transpose_val, int &num_passed, int &num_skipped) { + const std::vector& non_default_algorithms, + oneapi::mkl::transpose transpose_val, int& num_passed, int& num_skipped) { double density_A_matrix = 0.8; fpType fp_zero = set_fp_value()(0.f, 0.f); fpType fp_one = set_fp_value()(1.f, 0.f); @@ -212,10 +212,10 @@ void test_helper_with_format_with_transpose( */ template void test_helper_with_format( - testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device *dev, + testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, sycl::device* dev, sparse_matrix_format_t format, - const std::vector &non_default_algorithms, int &num_passed, - int &num_skipped) { + const std::vector& non_default_algorithms, int& num_passed, + int& num_skipped) { std::vector transpose_vals{ oneapi::mkl::transpose::nontrans, oneapi::mkl::transpose::trans, oneapi::mkl::transpose::conjtrans }; @@ -238,7 +238,7 @@ void test_helper_with_format( */ template void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, int &num_passed, int &num_skipped) { + sycl::device* dev, int& num_passed, int& num_skipped) { test_helper_with_format( test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, { oneapi::mkl::sparse::spmv_alg::no_optimize_alg, oneapi::mkl::sparse::spmv_alg::csr_alg1, @@ -253,12 +253,12 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6 /// Compute spmv reference as a dense operation template -void prepare_reference_spmv_data(sparse_matrix_format_t format, const intType *ia, - const intType *ja, const fpType *a, intType a_nrows, +void prepare_reference_spmv_data(sparse_matrix_format_t format, const intType* ia, + const intType* ja, const fpType* a, intType a_nrows, intType a_ncols, intType a_nnz, intType indexing, oneapi::mkl::transpose opA, fpType alpha, fpType beta, - const fpType *x, oneapi::mkl::sparse::matrix_view A_view, - fpType *y_ref) { + const fpType* x, oneapi::mkl::sparse::matrix_view A_view, + fpType* y_ref) { std::size_t a_nrows_u = static_cast(a_nrows); std::size_t a_ncols_u = static_cast(a_ncols); auto [opa_nrows, opa_ncols] = swap_if_transposed(opA, a_nrows_u, a_ncols_u); diff --git a/tests/unit_tests/sparse_blas/include/test_spsv.hpp b/tests/unit_tests/sparse_blas/include/test_spsv.hpp index bdf9210f8..032a0875b 100644 --- a/tests/unit_tests/sparse_blas/include/test_spsv.hpp +++ b/tests/unit_tests/sparse_blas/include/test_spsv.hpp @@ -48,9 +48,9 @@ */ template void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, sparse_matrix_format_t format, - oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { + sycl::device* dev, sparse_matrix_format_t format, + oneapi::mkl::transpose transpose_val, int& num_passed, + int& num_skipped) { double density_A_matrix = 0.144; fpType alpha = set_fp_value()(1.f, 0.f); int m = 277; @@ -158,8 +158,8 @@ void test_helper_with_format(testFunctorI32 test_functor_i32, testFunctorI64 tes */ template void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i64, - sycl::device *dev, oneapi::mkl::transpose transpose_val, int &num_passed, - int &num_skipped) { + sycl::device* dev, oneapi::mkl::transpose transpose_val, int& num_passed, + int& num_skipped) { test_helper_with_format(test_functor_i32, test_functor_i64, dev, sparse_matrix_format_t::CSR, transpose_val, num_passed, num_skipped); @@ -170,11 +170,11 @@ void test_helper(testFunctorI32 test_functor_i32, testFunctorI64 test_functor_i6 /// Compute spsv reference as a dense operation template -void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType *ia, - const intType *ja, const fpType *a, intType m, intType nnz, - intType indexing, oneapi::mkl::transpose opA, const fpType *x, +void prepare_reference_spsv_data(sparse_matrix_format_t format, const intType* ia, + const intType* ja, const fpType* a, intType m, intType nnz, + intType indexing, oneapi::mkl::transpose opA, const fpType* x, fpType alpha, oneapi::mkl::sparse::matrix_view A_view, - fpType *y_ref) { + fpType* y_ref) { std::size_t mu = static_cast(m); auto dense_opa = sparse_to_dense(format, ia, ja, a, mu, mu, static_cast(nnz), indexing, opA, A_view); diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp index b6f9e1185..0d95630bf 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_buffer.cpp @@ -23,18 +23,18 @@ #include "test_spmm.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, +int test_spmm(sycl::device* dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, intType ncols_C, double density_A_matrix, oneapi::mkl::index_base index, oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, intType ldc, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { if (test_scalar_on_device) { // Scalars on the device is not planned to be supported with the buffer API @@ -154,13 +154,13 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, A_view, A_handle, B_handle, &beta, C_handle, alg, descr); } } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPMM:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, B_handle, C_handle); if (descr) { sycl::event ev_release_descr; @@ -170,7 +170,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl; return 0; } @@ -190,7 +190,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, return static_cast(valid); } -class SparseSpmmBufferTests : public ::testing::TestWithParam {}; +class SparseSpmmBufferTests : public ::testing::TestWithParam {}; TEST_P(SparseSpmmBufferTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp index 5778430a6..3f09594eb 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmm_usm.cpp @@ -23,18 +23,18 @@ #include "test_spmm.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, +int test_spmm(sycl::device* dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, intType ncols_C, double density_A_matrix, oneapi::mkl::index_base index, oneapi::mkl::layout dense_matrix_layout, oneapi::mkl::transpose transpose_A, oneapi::mkl::transpose transpose_B, fpType alpha, fpType beta, intType ldb, intType ldc, oneapi::mkl::sparse::spmm_alg alg, oneapi::mkl::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); @@ -82,11 +82,11 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); auto beta_usm_uptr = malloc_device_uptr(main_queue, 1); - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *b_usm = b_usm_uptr.get(); - fpType *c_usm = c_usm_uptr.get(); + intType* ia_usm = ia_usm_uptr.get(); + intType* ja_usm = ja_usm_uptr.get(); + fpType* a_usm = a_usm_uptr.get(); + fpType* b_usm = b_usm_uptr.get(); + fpType* c_usm = c_usm_uptr.get(); std::vector mat_dependencies; std::vector spmm_dependencies; @@ -102,8 +102,8 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, spmm_dependencies.push_back( main_queue.memcpy(c_usm, c_host.data(), c_host.size() * sizeof(fpType))); - fpType *alpha_host_or_usm_ptr = α - fpType *beta_host_or_usm_ptr = β + fpType* alpha_host_or_usm_ptr = α + fpType* beta_host_or_usm_ptr = β if (test_scalar_on_device) { spmm_dependencies.push_back( main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); @@ -199,13 +199,13 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, ev_copy = main_queue.memcpy(c_host.data(), c_usm, c_host.size() * sizeof(fpType), ev_spmm); } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPMM:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, B_handle, C_handle); if (descr) { sycl::event ev_release_descr; @@ -215,7 +215,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPMM:\n" << error.what() << std::endl; return 0; } @@ -238,7 +238,7 @@ int test_spmm(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, return static_cast(valid); } -class SparseSpmmUsmTests : public ::testing::TestWithParam {}; +class SparseSpmmUsmTests : public ::testing::TestWithParam {}; TEST_P(SparseSpmmUsmTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp index 3d99f9e94..1864f6065 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_buffer.cpp @@ -23,16 +23,16 @@ #include "test_spmv.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, +int test_spmv(sycl::device* dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, double density_A_matrix, oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { if (test_scalar_on_device) { // Scalars on the device is not planned to be supported with the buffer API @@ -143,13 +143,13 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, A_handle, x_handle, &beta, y_handle, alg, descr); } } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPMV:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); if (descr) { sycl::event ev_release_descr; @@ -159,7 +159,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl; return 0; } @@ -178,7 +178,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, return static_cast(valid); } -class SparseSpmvBufferTests : public ::testing::TestWithParam {}; +class SparseSpmvBufferTests : public ::testing::TestWithParam {}; TEST_P(SparseSpmvBufferTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp index ded92a770..b24a6e0ee 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spmv_usm.cpp @@ -23,16 +23,16 @@ #include "test_spmv.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, +int test_spmv(sycl::device* dev, sparse_matrix_format_t format, intType nrows_A, intType ncols_A, double density_A_matrix, oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, fpType beta, oneapi::mkl::sparse::spmv_alg alg, oneapi::mkl::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); @@ -75,11 +75,11 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); auto beta_usm_uptr = malloc_device_uptr(main_queue, 1); - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *x_usm = x_usm_uptr.get(); - fpType *y_usm = y_usm_uptr.get(); + intType* ia_usm = ia_usm_uptr.get(); + intType* ja_usm = ja_usm_uptr.get(); + fpType* a_usm = a_usm_uptr.get(); + fpType* x_usm = x_usm_uptr.get(); + fpType* y_usm = y_usm_uptr.get(); std::vector mat_dependencies; std::vector spmv_dependencies; @@ -95,8 +95,8 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, spmv_dependencies.push_back( main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); - fpType *alpha_host_or_usm_ptr = α - fpType *beta_host_or_usm_ptr = β + fpType* alpha_host_or_usm_ptr = α + fpType* beta_host_or_usm_ptr = β if (test_scalar_on_device) { spmv_dependencies.push_back( main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); @@ -191,13 +191,13 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spmv); } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPMV:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); if (descr) { sycl::event ev_release_descr; @@ -207,7 +207,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPMV:\n" << error.what() << std::endl; return 0; } @@ -229,7 +229,7 @@ int test_spmv(sycl::device *dev, sparse_matrix_format_t format, intType nrows_A, return static_cast(valid); } -class SparseSpmvUsmTests : public ::testing::TestWithParam {}; +class SparseSpmvUsmTests : public ::testing::TestWithParam {}; TEST_P(SparseSpmvUsmTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp index 6b276dff4..ca5689d13 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_buffer.cpp @@ -23,15 +23,15 @@ #include "test_spsv.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, double density_A_matrix, +int test_spsv(sycl::device* dev, sparse_matrix_format_t format, intType m, double density_A_matrix, oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { if (test_scalar_on_device) { // Scalars on the device is not planned to be supported with the buffer API @@ -141,13 +141,13 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl A_handle, x_handle, y_handle, alg, descr); } } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPSV:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); if (descr) { sycl::event ev_release_descr; @@ -157,7 +157,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl; return 0; } @@ -176,7 +176,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl return static_cast(valid); } -class SparseSpsvBufferTests : public ::testing::TestWithParam {}; +class SparseSpsvBufferTests : public ::testing::TestWithParam {}; TEST_P(SparseSpsvBufferTests, RealSinglePrecision) { using fpType = float; diff --git a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp index 3b58db914..7a43a7112 100644 --- a/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp +++ b/tests/unit_tests/sparse_blas/source/sparse_spsv_usm.cpp @@ -23,15 +23,15 @@ #include "test_spsv.hpp" -extern std::vector devices; +extern std::vector devices; namespace { template -int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, double density_A_matrix, +int test_spsv(sycl::device* dev, sparse_matrix_format_t format, intType m, double density_A_matrix, oneapi::mkl::index_base index, oneapi::mkl::transpose transpose_val, fpType alpha, oneapi::mkl::sparse::spsv_alg alg, oneapi::mkl::sparse::matrix_view A_view, - const std::set &matrix_properties, + const std::set& matrix_properties, bool reset_data, bool test_scalar_on_device) { sycl::queue main_queue(*dev, exception_handler_t()); @@ -77,11 +77,11 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl auto y_usm_uptr = malloc_device_uptr(main_queue, y_host.size()); auto alpha_usm_uptr = malloc_device_uptr(main_queue, 1); - intType *ia_usm = ia_usm_uptr.get(); - intType *ja_usm = ja_usm_uptr.get(); - fpType *a_usm = a_usm_uptr.get(); - fpType *x_usm = x_usm_uptr.get(); - fpType *y_usm = y_usm_uptr.get(); + intType* ia_usm = ia_usm_uptr.get(); + intType* ja_usm = ja_usm_uptr.get(); + fpType* a_usm = a_usm_uptr.get(); + fpType* x_usm = x_usm_uptr.get(); + fpType* y_usm = y_usm_uptr.get(); std::vector mat_dependencies; std::vector spsv_dependencies; @@ -97,7 +97,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl spsv_dependencies.push_back( main_queue.memcpy(y_usm, y_host.data(), y_host.size() * sizeof(fpType))); - fpType *alpha_host_or_usm_ptr = α + fpType* alpha_host_or_usm_ptr = α if (test_scalar_on_device) { spsv_dependencies.push_back( main_queue.memcpy(alpha_usm_uptr.get(), &alpha, sizeof(fpType))); @@ -186,13 +186,13 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl ev_copy = main_queue.memcpy(y_host.data(), y_usm, y_host.size() * sizeof(fpType), ev_spsv); } - catch (const sycl::exception &e) { + catch (const sycl::exception& e) { std::cout << "Caught synchronous SYCL exception during sparse SPSV:\n" << e.what() << std::endl; print_error_code(e); return 0; } - catch (const oneapi::mkl::unimplemented &e) { + catch (const oneapi::mkl::unimplemented& e) { wait_and_free_handles(main_queue, A_handle, x_handle, y_handle); if (descr) { sycl::event ev_release_descr; @@ -202,7 +202,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl } return test_skipped; } - catch (const std::runtime_error &error) { + catch (const std::runtime_error& error) { std::cout << "Error raised during execution of sparse SPSV:\n" << error.what() << std::endl; return 0; } @@ -224,7 +224,7 @@ int test_spsv(sycl::device *dev, sparse_matrix_format_t format, intType m, doubl return static_cast(valid); } -class SparseSpsvUsmTests : public ::testing::TestWithParam {}; +class SparseSpsvUsmTests : public ::testing::TestWithParam {}; TEST_P(SparseSpsvUsmTests, RealSinglePrecision) { using fpType = float;